/*********************************************************
 * Copyright (C) 2000 VMware, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation version 2 and no later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 *
 *********************************************************/

/*
 * os.c --
 *
 *      Wrappers for Linux system functions required by "vmmemctl".
 */

/*
 * Compile-Time Options
 */

#define	OS_DISABLE_UNLOAD 0
#define	OS_DEBUG          1

/*
 * Includes
 */

#include "driver-config.h"

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/timer.h>
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>

#if defined(CONFIG_PROC_FS) || defined (CONFIG_DEBUG_FS)
#include <linux/stat.h>
#include <linux/seq_file.h>
#endif /* CONFIG_PROC_FS || CONFIG_DEBUG_FS*/

#include "compat_sched.h"

#include <asm/uaccess.h>
#include <asm/page.h>

#include "vmmemctl_version.h"
#include "os.h"
#include "vmballoon.h"


/*
 * Constants
 */

/*
 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
 * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
 * __GFP_NOWARN, to suppress page allocation failure warnings.
 */
#define OS_PAGE_ALLOC_NOSLEEP	(__GFP_HIGHMEM|__GFP_NOWARN)

/*
 * GFP_ATOMIC allocations dig deep for free pages. Maybe it is
 * okay because balloon driver uses OS_Malloc() to only allocate
 * few bytes, and the allocation requires a new page only occasionally.
 * Still if __GFP_NOMEMALLOC flag is available, then use it to inform
 * the guest's page allocator not to use emergency pools.
 */
#ifdef __GFP_NOMEMALLOC
#define OS_KMALLOC_NOSLEEP	(GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN)
#else
#define OS_KMALLOC_NOSLEEP	(GFP_ATOMIC|__GFP_NOWARN)
#endif

/*
 * Use GFP_HIGHUSER when executing in a separate kernel thread
 * context and allocation can sleep.  This is less stressful to
 * the guest memory system, since it allows the thread to block
 * while memory is reclaimed, and won't take pages from emergency
 * low-memory pools.
 */
#define	OS_PAGE_ALLOC_CANSLEEP	(GFP_HIGHUSER)

/*
 * Globals
 */

static struct task_struct *vmballoon_task;

/*
 *-----------------------------------------------------------------------------
 *
 * OS_Malloc --
 *
 *      Allocates kernel memory.
 *
 * Results:
 *      On success: Pointer to allocated memory
 *      On failure: NULL
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

void *
OS_Malloc(size_t size) // IN
{
   return kmalloc(size, OS_KMALLOC_NOSLEEP);
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_Free --
 *
 *      Free allocated kernel memory.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

void
OS_Free(void *ptr,   // IN
        size_t size) // IN
{
   kfree(ptr);
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_MemZero --
 *
 *      Fill a memory location with 0s.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

void
OS_MemZero(void *ptr,   // OUT
           size_t size) // IN
{
   memset(ptr, 0, size);
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_MemCopy --
 *
 *      Copy a memory portion into another location.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

void
OS_MemCopy(void *dest,      // OUT
           const void *src, // IN
           size_t size)     // IN
{
   memcpy(dest, src, size);
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_ReservedPageGetLimit --
 *
 *      Predict the maximum achievable balloon size.
 *
 *      In 2.4.x and 2.6.x kernels, the balloon driver can guess the number of pages
 *      that can be ballooned. But, for now let us just pass the totalram-size as the
 *      maximum achievable balloon size. Note that normally (unless guest kernel is
 *      booted with a mem=XX parameter) the totalram-size is equal to alloc.max.
 *
 * Results:
 *      The maximum achievable balloon size in pages.
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

unsigned long
OS_ReservedPageGetLimit(void)
{
   struct sysinfo info;

   /*
    * si_meminfo() is cheap. Moreover, we want to provide dynamic
    * max balloon size later. So let us call si_meminfo() every
    * iteration.
    */
   si_meminfo(&info);

   /* info.totalram is in pages */
   return info.totalram;
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_ReservedPageGetPA --
 *
 *      Convert a page handle (of a physical page previously reserved with
 *      OS_ReservedPageAlloc()) to a pa.
 *
 * Results:
 *      The pa.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

PA64
OS_ReservedPageGetPA(PageHandle handle) // IN: A valid page handle
{
   struct page *page = (struct page *)handle;

   return PPN_2_PA(page_to_pfn(page));
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_ReservedPageGetHandle --
 *
 *      Convert a pa (of a physical page previously reserved with
 *      OS_ReservedPageAlloc()) to a page handle.
 *
 * Results:
 *      The page handle.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

PageHandle
OS_ReservedPageGetHandle(PA64 pa)     // IN
{
   return (PageHandle)pfn_to_page(PA_2_PPN(pa));
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_ReservedPageAlloc --
 *
 *      Reserve a physical page for the exclusive use of this driver.
 *
 * Results:
 *      On success: A valid page handle that can be passed to OS_ReservedPageGetPA()
 *                  or OS_ReservedPageFree().
 *      On failure: PAGE_HANDLE_INVALID
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

PageHandle
OS_ReservedPageAlloc(int canSleep) // IN
{
   struct page *page;

   page = alloc_page(canSleep ? OS_PAGE_ALLOC_CANSLEEP : OS_PAGE_ALLOC_NOSLEEP);
   if (page == NULL) {
      return PAGE_HANDLE_INVALID;
   }

   return (PageHandle)page;
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_ReservedPageFree --
 *
 *      Unreserve a physical page previously reserved with OS_ReservedPageAlloc().
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

void
OS_ReservedPageFree(PageHandle handle) // IN: A valid page handle
{
   struct page *page = (struct page *)handle;

   __free_page(page);
}

/*
 *-----------------------------------------------------------------------------
 *
 * OS_Yield --
 *
 *      Yield the CPU, if needed.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      This thread might get descheduled, other threads might get scheduled.
 *
 *-----------------------------------------------------------------------------
 */

void
OS_Yield(void)
{
   cond_resched();
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_MapPageHandle --
 *
 *      Map a page handle into kernel address space, and return the
 *      mapping to that page handle.
 *
 * Results:
 *      The mapping.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

Mapping
OS_MapPageHandle(PageHandle handle)     // IN
{
   struct page *page = (struct page *)handle;

   return (Mapping)vmap(&page, 1, VM_MAP, PAGE_KERNEL);
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_Mapping2Addr --
 *
 *      Return the address of a previously mapped page handle (with
 *      OS_MapPageHandle).
 *
 * Results:
 *      The mapping address.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

void *
OS_Mapping2Addr(Mapping mapping)        // IN
{
   return (void *)mapping;
}


/*
 *-----------------------------------------------------------------------------
 *
 * OS_UnmapPage --
 *
 *      Unmap a previously mapped page handle.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

void
OS_UnmapPage(Mapping mapping)   // IN
{
   vunmap((void *)mapping);
}


/*
 *-----------------------------------------------------------------------------
 *
 * vmballoon_poll_loop --
 *
 *      Periodically (BALLOON_POLL_PERIOD - 1 sec) calls into common balloon
 *      code (Balloon_QueryAndExecute) to fetch the new ballooning target and
 *      adjust balloon size accordingly.
 *
 * Results:
 *      Always 0.
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

static int
vmballoon_poll_loop(void *unused)
{
   static wait_queue_head_t vmballoon_waitq;

   init_waitqueue_head(&vmballoon_waitq);

   /* Allow kernel freeze this thread during OS suspend/hibernate */
   compat_set_freezable();

   /* main loop */
   while (1) {
      /* sleep for specified period */
      wait_event_interruptible_timeout(vmballoon_waitq,
                                       compat_wait_check_freezing() ||
                                       kthread_should_stop(),
                                       BALLOON_POLL_PERIOD * HZ);
      compat_try_to_freeze();
      if (kthread_should_stop()) {
         break;
      }

      /* execute registered handler */
      Balloon_QueryAndExecute();
   }

   return 0;
}


#if defined(CONFIG_PROC_FS) || defined(CONFIG_DEBUG_FS)
static int
vmballoon_stats_show(struct seq_file *f,  // IN
                     void *data)          // IN: Unused
{
   const BalloonStats *stats = Balloon_GetStats();

   /* format size info */
   seq_printf(f,
              "target:             %8d pages\n"
              "current:            %8d pages\n",
              stats->nPagesTarget,
              stats->nPages);

   seq_printf(f,
              "rateNoSleepAlloc:   %8d pages/sec\n"
              "rateSleepAlloc:     %8d pages/sec\n"
              "rateFree:           %8d pages/sec\n",
              stats->rateNoSleepAlloc,
              stats->rateAlloc,
              stats->rateFree);

   seq_printf(f,
              "\n"
              "timer:              %8u\n"
              "start:              %8u (%4u failed)\n"
              "guestType:          %8u (%4u failed)\n"
              "lock:               %8u (%4u failed)\n"
              "unlock:             %8u (%4u failed)\n"
              "target:             %8u (%4u failed)\n"
              "primNoSleepAlloc:   %8u (%4u failed)\n"
              "primCanSleepAlloc:  %8u (%4u failed)\n"
              "primFree:           %8u\n"
              "errAlloc:           %8u\n"
              "errFree:            %8u\n",
              stats->timer,
              stats->start, stats->startFail,
              stats->guestType, stats->guestTypeFail,
              stats->lock,  stats->lockFail,
              stats->unlock, stats->unlockFail,
              stats->target, stats->targetFail,
              stats->primAlloc[BALLOON_PAGE_ALLOC_NOSLEEP],
              stats->primAllocFail[BALLOON_PAGE_ALLOC_NOSLEEP],
              stats->primAlloc[BALLOON_PAGE_ALLOC_CANSLEEP],
              stats->primAllocFail[BALLOON_PAGE_ALLOC_CANSLEEP],
              stats->primFree,
              stats->primErrorPageAlloc,
              stats->primErrorPageFree);

   return 0;
}

static int
vmballoon_stats_open(struct inode *inode, // IN: Unused
                     struct file *file)   // IN
{
   return single_open(file, vmballoon_stats_show, NULL);
}

#endif

#ifdef CONFIG_PROC_FS

#include <linux/proc_fs.h>

static struct file_operations vmballoon_proc_fops = {
   .open = vmballoon_stats_open,
   .read = seq_read,
   .llseek = seq_lseek,
   .release = single_release,
};

static void
vmballoon_procfs_init(void)
{
   struct proc_dir_entry *pde;

   pde = create_proc_entry("vmmemctl", S_IFREG | S_IRUGO, NULL);
   if (pde) {
      pde->proc_fops = &vmballoon_proc_fops;
   }
}

static void
vmballoon_procfs_exit(void)
{
   remove_proc_entry("vmmemctl", NULL);
}

#else

static void
vmballoon_procfs_init(void)
{
}

static void
vmbaloon_procfs_exit(void)
{
}

#endif /* CONFIG_PROC_FS */

#ifdef CONFIG_DEBUG_FS

#include <linux/debugfs.h>

/*
 * Note that vmballoon_debug_fops can't be const because early versions of
 * debugfs_create_file() used non-const fops argument.
 */
static struct file_operations vmballoon_debug_fops = {
   .owner   = THIS_MODULE,
   .open    = vmballoon_stats_open,
   .read    = seq_read,
   .llseek  = seq_lseek,
   .release = single_release,
};


static struct dentry *vmballoon_dbg_entry;

static void vmballoon_debugfs_init(void)
{
   vmballoon_dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, NULL,
                                             &vmballoon_debug_fops);
   if (IS_ERR(vmballoon_dbg_entry)) {
      int error = PTR_ERR(vmballoon_dbg_entry);
      printk(KERN_ERR "vmmemctl: failed to create debugfs entry, error: %d\n", error);
      vmballoon_dbg_entry = NULL;
   }
}

static void vmballoon_debugfs_exit(void)
{
   if (vmballoon_dbg_entry)
      debugfs_remove(vmballoon_dbg_entry);
}


#else

static void
vmballoon_debugfs_init(void)
{
}

static void
vmballoon_debugfs_exit(void)
{
}

#endif /* CONFIG_DEBUG_FS */

/*
 *-----------------------------------------------------------------------------
 *
 * vmballoon_init --
 *
 *      Called at driver startup, initializes the balloon state and structures.
 *
 * Results:
 *      On success: 0
 *      On failure: standard error code
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

static int
vmballoon_init(void)
{
   /* initialize global state */
   if (!Balloon_Init(BALLOON_GUEST_LINUX)) {
      return -EIO;
   }

   /* create kernel thread */
   vmballoon_task = kthread_run(vmballoon_poll_loop, NULL, BALLOON_NAME);
   if (IS_ERR(vmballoon_task)) {
      int error = PTR_ERR(vmballoon_task);
      printk(KERN_WARNING BALLOON_NAME
             ": unable to create kernel thread, error: %d\n", error);
      Balloon_Cleanup();
      return error;
   }

   if (OS_DEBUG) {
      printk(KERN_DEBUG BALLOON_NAME ": started kernel thread pid=%d\n",
             vmballoon_task->pid);
   }

   vmballoon_procfs_init();
   vmballoon_debugfs_init();

   /* prevent module unload with extra reference */
   if (OS_DISABLE_UNLOAD) {
      try_module_get(THIS_MODULE);
   }

   /* log device load */
   printk(KERN_INFO BALLOON_NAME_VERBOSE " initialized\n");

   return 0;
}
module_init(vmballoon_init);


/*
 *-----------------------------------------------------------------------------
 *
 * vmballoon_exit --
 *
 *      Called when the driver is terminating, cleanup initialized structures.
 *
 * Results:
 *      None
 *
 * Side effects:
 *      None
 *
 *-----------------------------------------------------------------------------
 */

static void
vmballoon_exit(void)
{
   vmballoon_procfs_exit();
   vmballoon_debugfs_exit();

   kthread_stop(vmballoon_task);

   Balloon_Cleanup();

   /* log device unload */
   printk(KERN_INFO BALLOON_NAME_VERBOSE " unloaded\n");
}
module_exit(vmballoon_exit);

/* Module information. */
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control Driver");
MODULE_LICENSE("GPL v2");
MODULE_VERSION(VMMEMCTL_DRIVER_VERSION_STRING);
MODULE_ALIAS("vmware_vmmemctl");
/*
 * Starting with SLE10sp2, Novell requires that IHVs sign a support agreement
 * with them and mark their kernel modules as externally supported via a
 * change to the module header. If this isn't done, the module will not load
 * by default (i.e., neither mkinitrd nor modprobe will accept it).
 */
MODULE_INFO(supported, "external");
