/*
 * Compressed RAM based swap device
 *
 * Copyright (C) 2008, 2009  Nitin Gupta 
 *
 * This RAM based block device acts as swap disk.
 * Pages swapped to this device are compressed and
 * stored in memory.
 *
 * Released under the terms of the GNU General Public
 * License (version 2). See linux/COPYING for more information.
 *
 * Project home: http://code.google.com/p/compcache
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/proc_fs.h>
#include <linux/string.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/vmalloc.h>

#include "compcache.h"
#include "compat.h"
#include "sub-projects/compression/lzo-kmod/lzo.h"
#include "sub-projects/allocators/xvmalloc-kmod/xvmalloc.h"

static struct block_device_operations compcache_devops = {
	.owner = THIS_MODULE,
};

static struct compcache compcache;
static unsigned long compcache_size_kbytes;
#if defined(STATS)
static struct compcache_stats stats;
#endif

#if defined(STATS)
static struct proc_dir_entry *proc;

static int proc_compcache_read(char *page, char **start, off_t off,
				int count, int *eof, void *data)
{
	int len;
	size_t succ_writes, mem_used;
	unsigned int good_compress_perc = 0, no_compress_perc = 0;

	mem_used = xvGetTotalSizeBytes(compcache.mem_pool)
			+ (stats.pages_expand << PAGE_SHIFT);

	if (off > 0) {
		*eof = 1;
		return 0;
	}

	/* Basic stats */
	len = sprintf(page,
		"DiskSize:	%8zu kB\n",
		(size_t)(compcache.size >> (10 - SECTOR_SHIFT)));

	succ_writes = stats.num_writes - stats.failed_writes;
	if (succ_writes) {
		good_compress_perc = stats.good_compress * 100
					/ stats.pages_stored;
		no_compress_perc = stats.pages_expand * 100
					/ stats.pages_stored;
	}

	/* Extended stats */
#define K(x)	((x) >> 10)
	len += sprintf(page + len,
		"NumReads:	%8llu\n"
		"NumWrites:	%8llu\n"
		"FailedReads:	%8llu\n"
		"FailedWrites:	%8llu\n"
		"InvalidIO:	%8llu\n"
		"PagesDiscard:	%8llu\n"
		"GoodCompress:	%8u %%\n"
		"NoCompress:	%8u %%\n"
		"PagesStored:	%8u\n"
		"PagesUsed:	%8zu\n"
		"OrigDataSize:	%8zu kB\n"
		"ComprDataSize:	%8zu kB\n"
		"MemUsed:	%8zu kB\n",
		stats.num_reads,
		stats.num_writes,
		stats.failed_reads,
		stats.failed_writes,
		stats.invalid_io,
		stats.pages_discard,
		good_compress_perc,
		no_compress_perc,
		stats.pages_stored,
		mem_used >> PAGE_SHIFT,
		(size_t)(K(stats.pages_stored << PAGE_SHIFT)),
		(size_t)(K(stats.compr_size)),
		(size_t)(K(mem_used)));

	return len;
}
#endif	/* STATS */

/* Check if request is within bounds and page aligned */
static inline int valid_swap_request(struct bio *bio)
{
	if (unlikely((bio->bi_sector >= compcache.size) ||
			(bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
			(bio->bi_vcnt != 1) ||
			(bio->bi_size != PAGE_SIZE) ||
			(bio->bi_io_vec[0].bv_offset != 0))) {
		return 0;
	}

	return 1;
}

static void compcache_free_page(size_t page_no)
{
	if (unlikely(compcache.table[page_no].len ==
					PAGE_SIZE)) {
		__free_page(pfn_to_page(
			compcache.table[page_no].pageNum));
		stat_dec(stats.pages_expand);
	} else {
		xvFree(compcache.mem_pool,
			compcache.table[page_no].pageNum,
			compcache.table[page_no].offset);
		stat_dec_if_less(stats.good_compress,
				compcache.table[page_no].len,
				PAGE_SIZE / 2 + 1);
	}
	stat_dec(stats.pages_stored);
	stat_set(stats.compr_size, stats.compr_size -
			compcache.table[page_no].len);
	compcache.table[page_no].pageNum = 0;
	compcache.table[page_no].offset = 0;
	compcache.table[page_no].len = 0;
}

#ifdef SWAP_DISCARD_SUPPORTED
static int compcache_prepare_discard(struct request_queue *q,
					struct request *req)
{
	return 0;
}

static void compcache_discard(struct bio *bio)
{
	size_t page_no, start_page, num_pages;

	start_page = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
	num_pages = bio->bi_size >> (SECTOR_SHIFT + SECTORS_PER_PAGE_SHIFT);
	
	for (page_no = start_page; page_no < start_page + num_pages;
							page_no++) {
		if (compcache.table[page_no].pageNum) {
			compcache_free_page(page_no);
			stat_inc(stats.pages_discard);
		}
	}
	set_bit(BIO_UPTODATE, &bio->bi_flags);
	BIO_ENDIO(bio, 0);
	return;
}
#endif

static int compcache_make_request(struct request_queue *queue, struct bio *bio)
{
	int ret;
	u32 offset;
	size_t clen, page_no;
	struct page *page, *page_store;
	unsigned char *user_mem, *cmem, *src;

#ifdef SWAP_DISCARD_SUPPORTED
	if (bio_discard(bio)) {
		compcache_discard(bio);
		return 0;
	}
#endif

	if (!valid_swap_request(bio)) {
		stat_inc(stats.invalid_io);
		goto out_nomap;
	}

	page = bio->bi_io_vec[0].bv_page;
	page_no = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;

	user_mem = kmap(page);

	switch (bio_data_dir(bio)) {
	case READ:
		stat_inc(stats.num_reads);
		/*
		 * This is attempt to read before any previous write
		 * to this location. This happens due to readahead when
		 * swap device is read from user-space (e.g. during swapon)
		 */
		if (unlikely(compcache.table[page_no].pageNum == 0)) {
			pr_debug("Read before write on swap device: "
				"sector=%lu, size=%u, offset=%u\n",
				(ulong)(bio->bi_sector),
				bio->bi_size,
				bio->bi_io_vec[0].bv_offset);
			memset(user_mem, 0, PAGE_SIZE);
			kunmap(page);
			set_bit(BIO_UPTODATE, &bio->bi_flags);
			BIO_ENDIO(bio, 0);
			return 0;
		}


		clen = PAGE_SIZE;
		cmem = (char *)kmap(
				pfn_to_page(compcache.table[page_no].pageNum))
			+ compcache.table[page_no].offset;

		/* Page is stored uncompressed since its incompressible */
		if (unlikely(compcache.table[page_no].len == PAGE_SIZE)) {
			memcpy(user_mem, cmem, PAGE_SIZE);
			kunmap(page);
			kunmap(pfn_to_page(compcache.table[page_no].pageNum));
			set_bit(BIO_UPTODATE, &bio->bi_flags);
			BIO_ENDIO(bio, 0);
			return 0;
		}

		//BUG_ON(xvGetObjectSize(cmem) != compcache.table[page_no].len);

		ret = lzo1x_decompress_safe(
			cmem, compcache.table[page_no].len,
			user_mem, &clen);

		kunmap(pfn_to_page(compcache.table[page_no].pageNum));

		/* should NEVER happen */
		if (unlikely(ret != LZO_E_OK)) {
			pr_err(C "Decompression failed! "
				"err=%d, page=%zu, len=%u\n", ret, page_no,
				compcache.table[page_no].len);
			stat_inc(stats.failed_reads);
			goto out;
		}

		kunmap(page);
		set_bit(BIO_UPTODATE, &bio->bi_flags);
		BIO_ENDIO(bio, 0);
		return 0;

	case WRITE:
		src = compcache.compress_buffer;
		stat_inc(stats.num_writes);

		BUG_ON(bio_data_dir(bio) != WRITE);
		BUG_ON(compcache.table[page_no].pageNum == INVALID_PGNUM);
		/*
		 * System swaps to same sector again when the stored page
		 * is no longer referenced by any process. So, its now safe
		 * to free the memory that was allocated for this page.
		 */
		if (compcache.table[page_no].pageNum) {
			compcache_free_page(page_no);
		}

		mutex_lock(&compcache.lock);
		ret = lzo1x_1_compress(user_mem, PAGE_SIZE,
			src, &clen, compcache.compress_workmem);
		if (unlikely(ret != LZO_E_OK)) {
			mutex_unlock(&compcache.lock);
			pr_err(C "Compression failed! err=%d\n", ret);
			stat_inc(stats.failed_writes);
			goto out;
		}

		/* Page is incompressible - store it as is */
		if (unlikely(clen > XV_MAX_ALLOC_SIZE)) {
			clen = PAGE_SIZE;
			src = user_mem;
			page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
			if (unlikely(!page_store)) {
				mutex_unlock(&compcache.lock);
				stat_inc(stats.failed_writes);
				goto out;
			}
			stat_inc(stats.pages_expand);
			compcache.table[page_no].pageNum = page_to_pfn(page_store);
			compcache.table[page_no].len = PAGE_SIZE;
			offset = 0;
		} else {
			if (xvMalloc(compcache.mem_pool, clen,
				&compcache.table[page_no].pageNum,
				&offset)) {
				mutex_unlock(&compcache.lock);
#if defined(VERBOSE)
				pr_info(C "Error allocating memory for compressed "
					"page: %zu, size=%zu \n", page_no, clen);
#endif
				stat_inc(stats.failed_writes);
				goto out;
			}
		}

		compcache.table[page_no].offset = offset;
		
		cmem = (char *)kmap(
				pfn_to_page(compcache.table[page_no].pageNum))
			+ compcache.table[page_no].offset;

		memcpy(cmem, src, clen);

		kunmap(pfn_to_page(compcache.table[page_no].pageNum));

		/* Update stats */
		stat_inc(stats.pages_stored);
		stat_set(stats.compr_size, stats.compr_size + clen);
		stat_inc_if_less(stats.pages_expand, PAGE_SIZE - 1, clen);
		stat_inc_if_less(stats.good_compress, clen,
						PAGE_SIZE / 2 + 1);
		mutex_unlock(&compcache.lock);
		
		compcache.table[page_no].len = clen;

		kunmap(page);
		set_bit(BIO_UPTODATE, &bio->bi_flags);
		BIO_ENDIO(bio, 0);
		return 0;
	}
out:
	/*
 	 * We come here only when some write error occurs.
	 * Read errors should *never* occur.
	 */
	compcache.table[page_no].pageNum = 0;
	compcache.table[page_no].offset = 0;
	compcache.table[page_no].len = 0;

	kunmap(page);

out_nomap:
	BIO_IO_ERROR(bio);
	return 0;
}

static void setup_swap_header(union swap_header *s)
{
	s->info.version = 1;
	s->info.last_page = compcache.size >> SECTORS_PER_PAGE_SHIFT;
	s->info.nr_badpages = 0;
	memcpy(s->magic.magic, "SWAPSPACE2", 10);
}

static int __init compcache_init(void)
{
	int ret;
	size_t num_pages;
	struct sysinfo i;
	struct page *page;
	void *swap_header;

	mutex_init(&compcache.lock);

	if (compcache_size_kbytes == 0) {
		pr_info(C "compcache size not provided."
			" Using default: (%u%% of Total RAM).\n"
			"Use compcache_size_kbytes module param to specify"
			" custom size\n", DEFAULT_COMPCACHE_PERCENT);
		si_meminfo(&i);
		compcache_size_kbytes = ((DEFAULT_COMPCACHE_PERCENT *
				i.totalram) / 100) << (PAGE_SHIFT - 10);
	}
	
	compcache.size = compcache_size_kbytes << 10;
	compcache.size = (compcache.size + PAGE_SIZE - 1) & PAGE_MASK;
	pr_info(C "Compressed swap size set to: %zu KB\n", compcache.size >> 10);
	compcache.size >>= SECTOR_SHIFT;

	compcache.compress_workmem = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
	if (compcache.compress_workmem == NULL) {
		pr_err(C "Error allocating compressor working memory\n");
		ret = -ENOMEM;
		goto fail;
	}

	compcache.compress_buffer = kmalloc(2 * PAGE_SIZE, GFP_KERNEL);
	if (compcache.compress_buffer == NULL) {
		pr_err(C "Error allocating compressor buffer space\n");
		ret = -ENOMEM;
		goto fail;
	}

	num_pages = compcache.size >> SECTORS_PER_PAGE_SHIFT;
        compcache.table = vmalloc(num_pages * sizeof(*compcache.table));
        if (compcache.table == NULL) {
                pr_err(C "Error allocating compcache address table\n");
                ret = -ENOMEM;
                goto fail;
        }
        memset(compcache.table, 0, num_pages * sizeof(*compcache.table));

	page = alloc_page(__GFP_ZERO);
	if (page == NULL) {
		pr_err(C "Error allocating swap header page\n");
		ret = -ENOMEM;
		goto fail;
	}
	compcache.table[0].pageNum = page_to_pfn(page);
	compcache.table[0].len = PAGE_SIZE;

	swap_header = kmap(page);
	setup_swap_header((union swap_header *)(swap_header));
	kunmap(page);

	compcache.disk = alloc_disk(1);
	if (compcache.disk == NULL) {
		pr_err(C "Error allocating disk structure\n");
		ret = -ENOMEM;
		goto fail;
	}

	compcache.disk->first_minor = 0;
	compcache.disk->fops = &compcache_devops;
	/*
	 * It is named like this to prevent distro installers
	 * from offering compcache as installation target. They
	 * seem to ignore all devices beginning with 'ram'
	 */
	strcpy(compcache.disk->disk_name, "ramzswap0");

	compcache.disk->major = register_blkdev(0, compcache.disk->disk_name);
	if (compcache.disk->major < 0) {
		pr_err(C "Cannot register block device\n");
		ret = -EFAULT;
		goto fail;
	}

	compcache.disk->queue = blk_alloc_queue(GFP_KERNEL);
	if (compcache.disk->queue == NULL) {
		pr_err(C "Cannot register disk queue\n");
		ret = -EFAULT;
		goto fail;
	}

	set_capacity(compcache.disk, compcache.size);
	blk_queue_make_request(compcache.disk->queue, compcache_make_request);

#ifdef SWAP_DISCARD_SUPPORTED
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, compcache.disk->queue);
	blk_queue_set_discard(compcache.disk->queue,
				compcache_prepare_discard);
#endif
	blk_queue_hardsect_size(compcache.disk->queue, PAGE_SIZE);
	add_disk(compcache.disk);

	compcache.mem_pool = xvCreateMemPool();
	if (compcache.mem_pool == INVALID_POOL_ID) {
		pr_err(C "Error creating memory pool\n");
		ret = -ENOMEM;
		goto fail;
	}

#if defined(STATS)
	proc = create_proc_entry("compcache", S_IRUGO, NULL);
	if (proc)
		proc->read_proc = &proc_compcache_read;
	else {
		ret = -ENOMEM;
		pr_warning(C "Error creating proc entry\n");
		goto fail;
	}
#endif

	pr_debug(C "Initialization done!\n");
	return 0;

fail:
	if (compcache.disk != NULL) {
		if (compcache.disk->major > 0)
			unregister_blkdev(compcache.disk->major,
					compcache.disk->disk_name);
		del_gendisk(compcache.disk);
	}

	if (compcache.table && compcache.table[0].pageNum)
		__free_page(pfn_to_page(compcache.table[0].pageNum));
	kfree(compcache.compress_workmem);
	kfree(compcache.compress_buffer);
	vfree(compcache.table);
	xvDestroyMemPool(compcache.mem_pool);
#if defined(STATS)
	if (proc)
		remove_proc_entry("compcache", proc->parent);
#endif
	pr_err(C "Initialization failed: err=%d\n", ret);
	return ret;
}

static void __exit compcache_exit(void)
{
	size_t i, num_pages;
	num_pages = compcache.size >> SECTORS_PER_PAGE_SHIFT;

	unregister_blkdev(compcache.disk->major, compcache.disk->disk_name);
	del_gendisk(compcache.disk);
	__free_page(pfn_to_page(compcache.table[0].pageNum));
	kfree(compcache.compress_workmem);
	kfree(compcache.compress_buffer);

	/* Free all pages that are still in compcache */
	for (i = 1; i < num_pages; i++) {
		if (!compcache.table[i].pageNum) {
			continue;
		}

		if (unlikely(compcache.table[i].len == PAGE_SIZE)) {
			__free_page(pfn_to_page(compcache.table[i].pageNum));
		} else {
			xvFree(compcache.mem_pool,
				compcache.table[i].pageNum,
				compcache.table[i].offset);
		}
	}

	vfree(compcache.table);
	xvDestroyMemPool(compcache.mem_pool);

#if defined(STATS)
	remove_proc_entry("compcache", proc->parent);
#endif
	pr_debug("cleanup done!\n");
}

#ifndef MODULE
static int __init compcache_size_setup(char *str)
{
	if (str)
		compcache_size_kbytes = simple_strtoul(str, NULL, 10);
	return 1;
}

__setup("compcache_size_kbytes=", compcache_size_setup);
#endif

module_param(compcache_size_kbytes, ulong, 0);
MODULE_PARM_DESC(compcache_size_kbytes, "compcache device size (in KB)");

module_init(compcache_init);
module_exit(compcache_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Nitin Gupta <nitingupta910@gmail.com>");
MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
