aboutsummaryrefslogtreecommitdiff
path: root/unix/boot/vmcached
diff options
context:
space:
mode:
authorJoe Hunkeler <jhunkeler@gmail.com>2015-08-11 16:51:37 -0400
committerJoe Hunkeler <jhunkeler@gmail.com>2015-08-11 16:51:37 -0400
commit40e5a5811c6ffce9b0974e93cdd927cbcf60c157 (patch)
tree4464880c571602d54f6ae114729bf62a89518057 /unix/boot/vmcached
downloadiraf-osx-40e5a5811c6ffce9b0974e93cdd927cbcf60c157.tar.gz
Repatch (from linux) of OSX IRAF
Diffstat (limited to 'unix/boot/vmcached')
-rw-r--r--unix/boot/vmcached/README17
-rw-r--r--unix/boot/vmcached/notes364
-rw-r--r--unix/boot/vmcached/vmcache.c1566
-rw-r--r--unix/boot/vmcached/vmcache.h19
-rw-r--r--unix/boot/vmcached/vmcached.c568
5 files changed, 2534 insertions, 0 deletions
diff --git a/unix/boot/vmcached/README b/unix/boot/vmcached/README
new file mode 100644
index 00000000..6844153c
--- /dev/null
+++ b/unix/boot/vmcached/README
@@ -0,0 +1,17 @@
+VMCACHED -- VMcache daemon.
+
+The VMcache daemon is a Unix server which manages a file cache in virtual
+memory. This is used to optimize virtual memory usage, allowing files to
+be cached in memory so that they can be shared or accessed without going
+to disk. It is also possible to conditionally access files via "direct
+i/o", bypassing system virtual memory and transferring the data directly
+from disk to or from process memory.
+
+NOTE: as of Dec 2001, the Vmcache library and vmcached have been updated
+to provide the capabilites described above. The daemon runs, and was used
+to develop the VM client interface, which is currently functional, tested,
+and installed in os$zfiobf.c. The new version of the VMcache library
+however, has not yet been fully tested and should not be used.
+
+Since this code is still under development it is not part of the normal
+IRAF build (hence no mkpkg or mkpkg.sh).
diff --git a/unix/boot/vmcached/notes b/unix/boot/vmcached/notes
new file mode 100644
index 00000000..f5da300b
--- /dev/null
+++ b/unix/boot/vmcached/notes
@@ -0,0 +1,364 @@
+Virtual Memory Caching Scheme
+Mon Oct 25 1999 - Thu Jan 20 2000
+
+
+OVERVIEW [now somewhat dated]
+
+Most modern Unix systems implement ordinary file i/o by mapping files into
+host memory, faulting the file pages into memory, and copying data to and
+from process memory and the cached file pages. This has the effect of
+caching recently read file data in memory. This scheme replaces the old
+Unix buffer cache, with the advantage that there is no builtin limit on
+the size of the cache. The global file cache is shared by both data files
+and the file pages of executing programs, and will grow until all physical
+memory is in use.
+
+The advantage of the virtual memory file system (VMFS) is that it makes
+maximal use of system memory for caching file data. If a relatively static
+set of data is repeatedly accessed it will remain in the system file cache,
+speeding access and minimizing i/o and page faulting. The disadvantage
+is the same thing: VMFS makes maximal use of system memory for caching
+file data. Programs which do heavy file i/o, reading a large amount of
+data, fault in a great deal of file data pages which may only be accessed
+once. Once the free list is exhausted the system page daemon runs to
+reclaim old file pages for reuse. The system pages heavily and becomes
+inefficient.
+
+The goal of the file caching scheme presented here is to continue to cache
+file data in the global system file cache, but control how data is cached to
+minimize use of the pageout daemon which runs when memory is exhausted. This
+scheme makes use of the ** existing operating system kernel facilities **
+to cache the file data and use the cached data for general file access.
+The trick is to try to control how data is loaded into the cache, and when
+it is removed from the cache, so that cache space is reused efficiently
+without invoking the system pageout daemon. Since data is cached by the
+system the cache benefits all programs which access the cached file data,
+without requiring that the programs explicitly use any cache facilities
+such as a custom library.
+
+
+HOW IT WORKS
+
+
+INTERFACE
+
+
+ vm = vm_initcache (initstr)
+ vm_closecache (vm)
+
+ vm_cachefile (vm, fname, flags)
+ vm_cachefd (vm, fd, flags)
+ vm_uncachefile (vm, fname)
+ vm_uncachefd (vm, fd)
+
+ vm_cacheregion (vm, fd, offset, nbytes, flags)
+ vm_uncacheregion (vm, fd, offset, nbytes)
+ vm_reservespace (vm, nbytes)
+ vm_sync (vm, fd)
+
+
+vm_cacheregion (vm, fd, offset, nbytes, flags)
+
+ check whether the indicated region is mapped (vm descriptor)
+ if not, free space from the tail of the cache; map new region
+ request that mapped region be faulted into memory (madvise)
+ move referenced file to head of cache
+
+ redundant requests are harmless, but will reload any missing pages,
+ and cause the file to again be moved to the head of the cache list
+
+ may need to scan the cache periodically to make adjustments for
+ files that have changed in size, or been deleted, while still in
+ the cache
+
+ cached regions may optionally be locked into memory until freed
+
+ the cache controller may function either as a library within a process,
+ or as a cache controller server process shared by multiple processes
+
+
+vm_uncacheregion (vm, fd, offset, nbytes)
+
+ check whether the indicated region is mapped
+ if so, unmap the pages
+ if no more pages remain mapped, remove file from cache list
+
+
+vm_reservespace (vm, nbytes)
+
+ unmap file segments from tail of list until the requested space
+ (plus some extra space) is available for reuse
+
+
+data structures
+
+ caching mechanism is file-oriented
+ linked list of mapped regions (each from a file)
+ for each region keep track of file descriptor, offset, size
+ linked list of file descriptors
+ for each file keep track of file size, mtime,
+ type of mapping (full,region) and so on
+
+ some dynamic things such as the size of a file or wether pages are memory
+ resident can only be determined by querying the system at runtime
+
+
+
+Solaris VM Interface
+
+ madvise (addr, len, advice)
+ mmap (addr, len, prot, flags, fildes, off)
+ munmap (addr, len)
+ mlock (addr, len)
+ munlock (addr, len)
+ memcntl (addr, len, cmd, arg, attr, mask)
+ mctl (addr, len, function, arg)
+ mincore (addr, len, *vec)
+ msync (addr, len, flags)
+
+ Notes
+ Madvise can be used to request that a range of pages be faulted
+ into memory (WILL_NEED), or freed from memory (DONT_NEED)
+
+ Mctl can be used to invalidate page mappings in a region
+
+ Mincore can be used to determine if pages in a given address range
+ are resident in memory
+
+
+
+VMCACHED -- December 2001
+------------------------------
+
+Added VMcache daemon and IRAF interface to same
+Design notes follow
+
+
+Various Cache Control Algorithms
+
+ 1. No Cache
+
+ No VMcache daemon. Clients use their builtin default i/o mechanism,
+ e.g., either normal or direct i/o depending upon the file size.
+
+ 2. Manually or externally controlled cache
+
+ Files are cached only when directed. Clients connect to the cache
+ daemon to see if files are in the cache and if so use normal VM i/o
+ to access data in the cache. If the file is not cached the client
+ uses its default i/o mechanism, e.g., direct i/o.
+
+ 3. LRU Cache
+
+ A client file access causes the accessed file to be cached. Normal
+ VM i/o is used for file i/o. As new files are cached the space
+ used by the least recently used files is reclaimed. Accessing a
+ file moves it to the head of the cache, if it is still in the cache.
+ Otherwise it is reloaded.
+
+ 4. Adaptive Priority Cache
+
+ This is like the LRU cache, but the cache keeps statistics on files
+ whether or not they have aged out of the cache, and raises the
+ cache priority or lifetime of files that are more frequently
+ accessed. Files that are only accessed once tend to pass quickly
+ through the cache, or may not even be cached until the second
+ access. Files that are repeatedly accessed have a higher priority
+ and will tend to stay in the cache.
+
+The caching mechanism and algorithm used are independent of the client
+programs, hence can be easily tuned or replaced with a different algorithm.
+
+Factors determining if a file is cached:
+
+ user-assigned priority (0=nocache; 1-N=cache priority)
+ number of references
+ time since last access (degrades nref)
+ amount of available memory (cutoff point)
+
+Cache priority
+
+ priority = userpri * max(0,
+ (nref-refbase - ((time - last_access) / tock)) )
+
+Tunable parameters
+
+ userpri User defined file priority. Files with a higher
+ priority stay in the cache longer. A zero priority
+ prevents a file from being cached.
+
+ refbase The number of file references has to exceed refbase
+ before the file will be cached. For example, if
+ refbase=0 the file will be cacheable on the first
+ reference. If refbase=1 a file will only become
+ cacheable if accessed two or more times. Refbase
+ can be used to exclude files from the cache that
+ are only referenced once and hence are not worth
+ caching.
+
+ tock While the number of accesses increases the cache
+ priority of a file, the time interval since the
+ last access likewise decreases the cache priority
+ of the file. A time interval of "tock" seconds
+ will cancel out one file reference. In effect,
+ tock=N means that a file reference increases the
+ cache priority of a file for N seconds. A
+ frequently referenced file will be relatively
+ unaffected by tock, but tock will cause
+ infrequently referenced files to age out of the
+ cache within a few tocks.
+
+Cache Management
+
+ Manual cache control
+
+ Explicitly caching or refreshing a file always maps the file into
+ memory and moves it to the head of the cache.
+
+ File access
+
+ Accessing a file (vm_accessfile) allows cache optimization to
+ occur. The file nref and access time are updated and the priority
+ of the current file and all files (to a certain depth in the cache
+ list) are recomputed. If a whole-file level access is being
+ performed the file size is examined to see if it has changed and
+ if the file has gotten larger a new segment is created. The
+ segment descriptor is then unlinked and relinked in the cache in
+ cache priority order. If the segment is above the VM cutoff it
+ is loaded into the cache: lower priority segments are freed as
+ necessary, and if the file is an existing file it is marked
+ WILL_NEED to queue the file data to be read into memory.
+
+ If the file is a new file it must already have been created
+ externally to be managed under VMcache. The file size at access
+ time will determine the size of the file entry in the cache. Some
+ systems (BSD, Sun) allow a mmap to extend beyond the end of a
+ file, but others (Linux) do not. To reserve space for a large
+ file where the ultimate size of the file is known in advance, one
+ can write a byte where the last byte of the file will be (as with
+ zfaloc in IRAF) before caching the file, and the entire memory
+ space will be reserved in advance. If a file is cached and later
+ extended, re-accessing the file will automatically cache the new
+ segment of the file (see above).
+
+ Data structures
+
+ Segment descriptors
+ List of segments linked in memory allocation order
+ first N segments are cached (whatever will fit)
+ remainder are maintained in list, but are not cached
+ manually cached/refreshed segments go to head of list
+ accessed files are inserted in list based on priority
+ List of segments belonging to the same file
+ a file can be stored in the cache in multiple segments
+
+ File hash table
+ provides fast lookup of an individual file
+ hash dev+ino to segment
+ segment points to next segment if collision occurs
+ only initial/root file segment is indexed
+
+ Cache management
+
+ Relinking of the main list occurs only in certain circumstances
+ when a segment is manually cached/uncached/refreshed
+ referenced segment moves to head of list
+ new segment is always cached
+ when a file or segment is accessed
+ priority of each element is computed and segment is
+ placed in priority order (only referenced segment is moved)
+ caching/uncaching may occur due to new VM cutoff
+ when a new segment is added
+ when an old segment is deleted
+ Residency in memory is determined by link order
+ priority normally determines memory residency
+ but manual caching will override (for a time)
+
+
+File Driver Issues
+
+ Image kernels
+
+ Currently only OIF uses the SF driver. FXF, STF, and QPF (FMIO)
+ all use the BF driver. Some or all could be changed to use SF
+ if it is made compatible with BF, otherwrise the VM hooks need
+ to go into the BF driver. Since potentially any large file can
+ be cached, putting the VM support into BF is a reasonable option.
+
+ The FITS kernel is a problem currently as it violates device
+ block size restrictions, using a block size of 2880.
+
+ It is always a good idea to use falloc to pre-allocate storage for
+ a large imagefile when the size is known in advance. This permits
+ the VM system to reserve VM space for a new image before data is
+ written to the file.
+
+ Direct I/O
+
+ Direct i/o is possible only if transfers are aligned on device
+ blocks and are an integral number of blocks in length.
+
+ Direct i/o flushes any VM buffered data for the file. If a file
+ is mapped into memory this is not possible, hence direct i/o is
+ disabled for a file while it is mapped into memory.
+
+ This decision is made at read/write time, hence cannot be
+ determined reliably when a file is opened.
+
+ FITS Kernel
+
+ Until the block size issues can be addressed, direct i/o cannot
+ be used for FITS images. Some VM cache control is still possible
+ however. Options include:
+
+ o Always cache a .fits image: either set vmcached to cache a file
+ on the first access, or adjust the cache parameters based on
+ the file type. Use a higher priority for explicitly cached
+ files (e.g. Mosaic readouts), so that running a sequence of
+ normal i/o images through the cache does not flush the high
+ priority images.
+
+ o Writing to new files which have not been pre-allocated is
+ problematic as a large amount of data can be written, causing
+ paging. One way to deal with this is to use large transfers
+ (IMIO will already do this), and to issue a reservespace
+ directive on each file write at EOF, to free up VM space as
+ needed. The next access directive would cause the new
+ portion of the image to be mapped into the cache.
+
+ A possible problem with this is that the new file may initially
+ be too small to reach the cache threshold. Space could be
+ reserved in any case, waiting for the next access to cache
+ the file; the cache daemon could always cache new files of a
+ certain type; or the file could be cached when it reaches the
+ cache threshold.
+
+ Kernel File Driver
+
+ A environment variable will be used in the OS driver to define a
+ cache threshold or to disable use of VMcache entirely. We need
+ to be able to specify these two things separately. If a cache
+ threshold is set, files smaller than this size will not result in
+ a query to the cache daemon. If there is no cache threshold but
+ VMcache is enabled, the cache daemon will decide whether the file
+ is too small to be cached. It should also be possible to force
+ the use of direct i/o if the file is larger than a certain size.
+
+ Kernel file driver parameters:
+
+ enable boolean
+
+ vmcache Use vmcache only if the file size equals or exceeds
+ the specified threshold.
+
+ directio If the file size equals or exceeds the specified
+ threshold use direct i/o to access the file. If
+ direct i/o is enabled in this fashion then vmcache
+ is not used (otherwise vmcache decides whether to
+ use direct i/o for a file).
+
+ port Socket number to be used.
+
+ VMPORT=8797
+ VMCLIENT=enable,threshold=10m,directio=10m
+
diff --git a/unix/boot/vmcached/vmcache.c b/unix/boot/vmcached/vmcache.c
new file mode 100644
index 00000000..a072951f
--- /dev/null
+++ b/unix/boot/vmcached/vmcache.c
@@ -0,0 +1,1566 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include "vmcache.h"
+
+#ifdef sun
+#ifndef MS_SYNC
+#define MS_SYNC 0 /* SunOS */
+#else
+#include <sys/systeminfo.h>
+#endif
+#endif
+
+/*
+ * Virtual Memory Cache Controller
+ *
+ * The VM cache controller manages a region of physical memory in the host
+ * computer. Entire files or file segments are loaded into the cache (into
+ * memory). Space to store such files is made available by the cache
+ * controller by freeing the least recently used file segments. This explicit
+ * freeing of space immediately before it is reused for new data prevents
+ * (in most cases) the kernel reclaim page daemon from running, causing cached
+ * data to remain in memory until freed, and preventing the flow of data
+ * through the cache from causing the system to page heavily and steal pages
+ * away from the region of memory outside the cache.
+ *
+ * vm = vm_initcache (vm|NULL, initstr)
+ * vm_status (vm, outbuf, maxch, flags)
+ * vm_closecache (vm)
+ *
+ * vm_access (vm, fname, mode, flags)
+ * vm_statfile (vm, fname, flags)
+ * vm_setpriority (vm, fname, priority)
+ * vm_cachefile (vm, fname, flags)
+ * vm_uncachefile (vm, fname, flags)
+ * vm_refreshfile (vm, fname, flags)
+ * vm_cachefd (vm, fd, acmode, flags)
+ * vm_uncachefd (vm, fd, flags)
+ * vm_refreshfd (vm, fd, flags)
+ *
+ * vm_reservespace (vm, nbytes)
+ * addr = vm_cacheregion (vm, fname, fd, offset, nbytes, acmode, flags)
+ * vm_uncacheregion (vm, fd, offset, nbytes, flags)
+ * vm_refreshregion (vm, fd, offset, nbytes)
+ *
+ * vm_sync (vm, fd, offset, nbytes, flags)
+ * vm_msync (vm, addr, nbytes, flags)
+ *
+ * Before the VM cache is used it should be initialized with vm_initcache.
+ * The string "initstr" may be used to set the size of the cache, enable
+ * or disable it (e.g. for performance tests), and set other options.
+ * A summary of the VMcache configuration and contents can be generated
+ * with vm_status.
+ *
+ * Files or file segments are loaded into the cache with routines such as
+ * vm_cachefile and vm_cacheregion. Normally, cached files or file segments
+ * are reused on a least-recently-used basis. A file can be locked in the
+ * cache by setting the VM_LOCKFILE flag when the file is cached. This is
+ * automatic for vm_cacheregion since the address at which the file is
+ * mapped is returned to the caller and hence the file is assumed to be in
+ * use. When a file or region which is locked in the cache is no longer
+ * needed one of the "uncache" routines should be called to make the space
+ * used by the cached file data available for reuse. Note that "uncaching"
+ * a file or file segment does not immediately remove the data from the
+ * cache. Any "uncached" data normally remains in the cache until the
+ * space it uses is needed to load other data.
+ *
+ * VMcache is a library which is compiled into a process. This can be
+ * incorportated into a server process to manage the VM cache for a
+ * group of cooperating processes running on the same computer. The
+ * vmcached program (VMcache daemon) is one such program.
+ */
+
+
+#define DEF_CACHESIZE "50%"
+#define DEF_PHYSPAGES 32768
+#define READAHEAD 32768
+#define DEF_PRIORITY 1
+#define DEF_REFBASE 1
+#define DEF_TOCK 600
+#define SZ_HASHTBL 16384
+#define SZ_NAME 64
+#define SZ_VALSTR 64
+#define SZ_PATHNAME 1024
+#define SZ_LINE 4096
+
+/* Solaris and FreeBSD have a madvise() system call. */
+#define HAVE_MADVISE 1
+
+/* Linux provides a madvise call, but it is not implemented and produces
+ * a linker warning message. The madvise call will always fail, but this
+ * is harmless (it just means that the cache fails to control paging and
+ * everything operates "normally".
+ */
+#ifdef linux
+#undef HAVE_MADVISE
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* don't need these page */
+#endif
+
+#define isfile(sp,st) (sp->device == st.st_dev && sp->inode == st.st_ino)
+
+
+/* Segment descriptor. */
+struct segment {
+ struct segment *next;
+ struct segment *prev;
+ struct segment *nexthash;
+ int priority;
+ int userpri;
+ int refcnt;
+ int nrefs;
+ time_t atime;
+ time_t ptime;
+ void *addr;
+ int fd;
+ int acmode;
+ unsigned long inode;
+ unsigned long device;
+ unsigned long offset;
+ unsigned long nbytes;
+ char *fname;
+}; typedef struct segment Segment;
+
+/* Main VMcache descriptor. */
+struct vmcache {
+ Segment *segment_head, *last_mapped, *segment_tail;
+ int cache_initialized;
+ int cache_enabled;
+ int cachelen;
+ unsigned long cacheused;
+ unsigned long cachesize;
+ unsigned long physmem;
+ int lockpages;
+ int pagesize;
+ int defuserpri;
+ int refbase;
+ int tock;
+}; typedef struct vmcache VMcache;
+
+static debug = 0;
+static VMcache vmcache;
+static Segment *hashtbl[SZ_HASHTBL];
+
+static int primes[] = {
+ 101,103,107,109,113,127,131,137,139,
+ 149,151,157,163,167,173,179,181,191,
+};
+
+static vm_readahead();
+static vm_uncache();
+static Segment *vm_locate();
+static int vm_cachepriority();
+static int hashint();
+
+
+/* VM_INITCACHE -- Initialize the VM cache. A pointer to the cache
+ * descriptor is returned as the function value, or NULL if the cache cannot
+ * be initialized. The argument VM may point to an existing cache which
+ * is to be reinitialized, or may be NULL if the cache is being initialized
+ * for the first time.
+ *
+ * The INITSTR argument is used to control all init-time cache options.
+ * INITSTR is a sequence of keyword=value substrings. The recognized options
+ * are as follows:
+ *
+ * cachesize total cache size
+ * lockpages lock pages in memory
+ * enable enable the cache
+ * debug turn on debug messages
+ * defpri default file priority
+ * refbase number of file references before file is cached
+ * tock interval (seconds) at which file references degrade
+ *
+ * Other options may be added in the future.
+ *
+ * Keywords which take a size type value (e.g. cachesize) permit values
+ * such as "x" (size in bytes), "x%" (X percent of physical memory), "xK"
+ * (X kilobytes), or "xM" (X megabytes). The "x%" notation may not work
+ * correctly on all systems as it is not always easy to determine the total
+ * physical memory.
+ *
+ * If the cache is initialized with "enable=no" then all the cache routines
+ * will still be called, the cache controller will be disabled.
+ */
+void *
+vm_initcache (vm, initstr)
+register VMcache *vm;
+char *initstr;
+{
+ register char *ip, *op;
+ char keyword[SZ_NAME], valstr[SZ_NAME];
+ char cachesize[SZ_VALSTR], *modchar;
+ int percent, enable = 1, lockpages = 0;
+ int defuserpri, refbase, tock;
+ unsigned long physpages;
+
+ if (debug)
+ fprintf (stderr, "vm_initcache (0x%x, \"%s\")\n", vm, initstr);
+
+ strcpy (cachesize, DEF_CACHESIZE);
+ defuserpri = DEF_PRIORITY;
+ refbase = DEF_REFBASE;
+ tock = DEF_TOCK;
+
+ /* Scan the initialization string. Initstr may be NULL or the empty
+ * string, if only the defaults are desired.
+ */
+ for (ip=initstr; ip && *ip; ) {
+ /* Advance to the next keyword=value pair. */
+ while (*ip && (isspace(*ip) || *ip == ','))
+ ip++;
+
+ /* Extract the keyword. */
+ for (op=keyword; *ip && isalnum(*ip); )
+ *op++ = *ip++;
+ *op = '\0';
+
+ while (*ip && (isspace(*ip) || *ip == '='))
+ ip++;
+
+ /* Extract the value string. */
+ for (op=valstr; *ip && (isalnum(*ip) || *ip == '%'); )
+ *op++ = *ip++;
+ *op = '\0';
+
+ if (strcmp (keyword, "cachesize") == 0) {
+ strcpy (cachesize, valstr);
+ } else if (strcmp (keyword, "defpri") == 0) {
+ defuserpri = atoi (valstr);
+ } else if (strcmp (keyword, "refbase") == 0) {
+ refbase = atoi (valstr);
+ } else if (strcmp (keyword, "tock") == 0) {
+ tock = atoi (valstr);
+ } else if (strcmp (keyword, "lockpages") == 0) {
+ int ch = valstr[0];
+ lockpages = (ch == 'y' || ch == 'Y');
+ } else if (strcmp (keyword, "enable") == 0) {
+ int ch = valstr[0];
+ enable = (ch == 'y' || ch == 'Y');
+ } else if (strcmp (keyword, "debug") == 0) {
+ int ch = valstr[0];
+ debug = (ch == 'y' || ch == 'Y');
+ }
+ }
+
+ /* The VM cache needs to be global for a given host, so we just
+ * use a statically allocated cache descriptor here. In the most
+ * general case the whole VMcache interface needs to be split into
+ * a client-server configuration, with the cache server managing
+ * virtual memory for a collection of processes.
+ */
+ if (!vm)
+ vm = &vmcache;
+
+ /* Shut down the old cache if already enabled. */
+ vm_closecache (vm);
+
+ /* There is no good way to guess the total physical memory if this
+ * is not available from the system. But in such a case the user
+ * can just set the value of the cachesize explicitly in the initstr.
+ */
+#ifdef _SC_PHYS_PAGES
+ physpages = sysconf (_SC_PHYS_PAGES);
+ if (debug) {
+ fprintf (stderr, "total physical memory %d (%dm)\n",
+ physpages * getpagesize(),
+ physpages * getpagesize() / (1024 * 1024));
+ }
+#else
+ physpages = DEF_PHYSPAGES;
+#endif
+
+ vm->cachelen = 0;
+ vm->cacheused = 0;
+ vm->cache_enabled = enable;
+ vm->cache_initialized = 1;
+ vm->segment_head = NULL;
+ vm->segment_tail = NULL;
+ vm->pagesize = getpagesize();
+ vm->physmem = physpages * vm->pagesize;
+ vm->lockpages = lockpages;
+ vm->defuserpri = defuserpri;
+ vm->refbase = refbase;
+ vm->tock = tock;
+
+ vm->cachesize = percent = strtol (cachesize, &modchar, 10);
+ if (modchar == cachesize)
+ vm->cachesize = physpages / 2 * vm->pagesize;
+ else if (*modchar == '%')
+ vm->cachesize = physpages * percent / 100 * vm->pagesize;
+ else if (*modchar == 'k' || *modchar == 'K')
+ vm->cachesize *= 1024;
+ else if (*modchar == 'm' || *modchar == 'M')
+ vm->cachesize *= (1024 * 1024);
+ else if (*modchar == 'g' || *modchar == 'G')
+ vm->cachesize *= (1024 * 1024 * 1024);
+
+ return ((void *)vm);
+}
+
+
+/* VM_CLOSECACHE -- Forcibly shutdown a cache if it is already open.
+ * All segments are freed and returned to the system. An attempt is made
+ * to close any open files (this is the only case where the VM cache code
+ * closes files opened by the caller).
+ */
+vm_closecache (vm)
+register VMcache *vm;
+{
+ register Segment *sp;
+ struct stat st;
+
+ if (debug)
+ fprintf (stderr, "vm_closecache (0x%x)\n", vm);
+ if (!vm->cache_initialized)
+ return;
+
+ /* Free successive segments at the head of the cache list until the
+ * list is empty.
+ */
+ while (sp = vm->segment_head) {
+ vm_uncache (vm, sp, VM_DESTROYREGION | VM_CANCELREFCNT);
+
+ /* Since we are closing the cache attempt to forcibly close the
+ * associated file descriptor if it refers to an open file.
+ * Make sure that FD refers to the correct file.
+ */
+ if (fstat (sp->fd, &st) == 0)
+ if (isfile(sp,st))
+ close (sp->fd);
+ }
+
+ vm->cache_initialized = 0;
+}
+
+
+/* VM_ACCESS -- Access the named file and determine if it is in the cache.
+ * Accessing a file via vm_access may cause the file to be loaded into the
+ * cache, depending upon the cache tuning parameters and per-file statistics
+ * such as the number of past references to the file and how recently they
+ * occurred. A return value of -1 indicates that the named file does not
+ * exist or could not be physically accessed. A value of zero indicates
+ * that the file is not cached (is not being managed by the cache). A value
+ * of 1 indicates that the file is being managed by the cache. Accessing
+ * a file updates the reference count and time of last access of the file.
+ * and increases the probability that it will be cached in memory.
+ *
+ * Applications which use VMcache should call vm_access whenever a file is
+ * opened or otherwise accessed so that VMcache can keep statistics on file
+ * accesses and optimize use of the cache. If vm_access returns 1 the client
+ * should use normal i/o to access the file (normal VM-based file i/o or
+ * mmap). If vm_access returns 0 VMcache has determined that the file is
+ * not worth caching in memory, and some form of direct i/o (bypassing
+ * system virtual memory) should be used to access the file.
+ *
+ * The file must exist at the time that vm_access is called. If the file
+ * already exists and has changed size (e.g., data was appended to the file
+ * since the last access) then vm_access will add or remove VM segments to
+ * adjust to the new size of the file. If a new file is being created and
+ * it is desired to reserve VM space for the file, two approaches are
+ * possible: 1) use seek,write to write a byte where the EOF of the new
+ * file will be when all data has been written, so that vm_access will
+ * reserve space for the new file pages; 2) access the short or zero-length
+ * file, explicitly reserve unallocated VM space with vm_reservespace,
+ * and rely upon vm_access to adjust to the new file size the next time
+ * the file is accessed. Option 1) is the best technique for reserving VM
+ * space for large new files which may subsequently be shared by other
+ * applications.
+ */
+vm_access (vm, fname, mode, flags)
+register VMcache *vm;
+char *fname, *mode;
+int flags;
+{
+ register Segment *sp, *xp;
+ Segment *first=NULL, *last=NULL;
+ unsigned long offset, x0, x1, vm_offset, vm_nbytes;
+ int spaceused, map, n, status=0, fd;
+ struct stat st;
+
+ if (debug)
+ fprintf (stderr, "vm_access (0x%x, \"%s\", 0%o)\n",
+ vm, fname, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if ((fd = open (fname, O_RDONLY)) < 0)
+ return (-1);
+ if (fstat (fd, &st) < 0) {
+abort: close (fd);
+ return (-1);
+ }
+
+ /* Align offset,nbytes to map the full file. */
+ x0 = offset = 0;
+ x0 = (x0 - (x0 % vm->pagesize));
+ x1 = offset + st.st_size - 1;
+ x1 = (x1 - (x1 % vm->pagesize)) + vm->pagesize - 1;
+ vm_offset = x0;
+ vm_nbytes = x1 - x0 + 1;
+
+again:
+ /* See if the file is already in the cache list. */
+ first = last = vm_locate (vm, st.st_ino, st.st_dev);
+ for (sp = first; sp; sp = sp->nexthash)
+ if (isfile(sp,st))
+ last = sp;
+
+ /* If the file is already in the cache check whether it has changed
+ * size and adjust the segment descriptors until they agree with the
+ * current file size before we proceed further.
+ */
+ if (last) {
+ if (vm_nbytes < (last->offset + last->nbytes)) {
+ /* If the file has gotten smaller uncache the last segment
+ * and start over. Repeat until the last segment includes EOF.
+ */
+ vm_uncache (vm, last, VM_DESTROYREGION|VM_CANCELREFCNT);
+ goto again;
+
+ } else if (vm_nbytes > (last->offset + last->nbytes)) {
+ /* If the file has gotten larger cache the new data as a new
+ * file segment.
+ */
+ unsigned long offset, nbytes;
+ void *addr;
+
+ offset = last->offset + last->nbytes;
+ nbytes = vm_nbytes - offset;
+ addr = vm_cacheregion (vm, fname, fd,
+ offset, nbytes, last->acmode, VM_DONTMAP);
+ if (!addr)
+ goto abort;
+ goto again;
+ }
+ /* else fall through */
+ } else {
+ /* File is not currently in the cache. Create a new segment
+ * encompassing the entire file, but don't map it in yet.
+ */
+ void *addr;
+ addr = vm_cacheregion (vm, fname, fd,
+ vm_offset, vm_nbytes, VM_READONLY, VM_DONTMAP);
+ if (!addr)
+ goto abort;
+ goto again;
+ }
+
+ /*
+ * If we get here we have one or more file segments in the cache.
+ * The segments may or may not be mapped and they can be anywhere
+ * in the cache list. We need to compute the new priority for the
+ * file, relocate the segments in the cache, determine whether or
+ * not the file will be mapped, and adjust the contents of the
+ * cache accordingly.
+ */
+
+ /* Update the priority of the current file and give all cached file
+ * segments the same reference attributes, since we treating the
+ * entire file as a whole here.
+ */
+ first = vm_locate (vm, st.st_ino, st.st_dev);
+ first->nrefs++;
+ first->atime = time(0);
+ first->priority = vm_cachepriority (vm, first);
+
+ for (sp = first; sp; sp = sp->nexthash)
+ if (isfile(sp,st)) {
+ sp->nrefs = first->nrefs;
+ sp->atime = first->atime;
+ sp->priority = first->priority;
+ }
+
+ /* Recompute the priorities of all other segments in the head or
+ * "active" area of the cache list.
+ */
+ for (sp = vm->segment_head, n=0; sp; sp = sp->next, n++) {
+ if (!isfile(sp,st))
+ sp->priority = vm_cachepriority (vm, sp);
+ if (sp == vm->last_mapped)
+ break;
+ }
+ for (sp = vm->last_mapped->next; --n >= 0 && sp; sp = sp->next)
+ if (!isfile(sp,st))
+ sp->priority = vm_cachepriority (vm, sp);
+
+ /* Scan the cache list and determine where in priority order to place
+ * the accessed segment. Since manually cached segments are always
+ * placed at the head of the list there is no guarantee that the cache
+ * list will be in strict priority order, but this doesn't matter.
+ */
+ for (xp = vm->segment_head; xp; xp = xp->next)
+ if (first->priority >= xp->priority)
+ break;
+
+ /* Relink each segment of the accessed file in just before the lower
+ * priority segment pointed to by XP. This collects all the file
+ * segments in allocation order within the list.
+ */
+ for (sp=first; sp; sp = sp->nexthash)
+ if (isfile(sp,st)) {
+ /* Unlink segment SP. */
+ if (sp->next)
+ sp->next->prev = sp->prev;
+ else
+ vm->segment_tail = sp->prev;
+
+ if (sp->prev)
+ sp->prev->next = sp->next;
+ else
+ vm->segment_head = sp->next;
+
+ /* Link segment SP in just before XP. */
+ sp->next = xp;
+ if (xp) {
+ sp->prev = xp->prev;
+ sp->prev->next = sp;
+ } else {
+ /* XP is NULL; SP will be the new segment_tail. */
+ sp->prev = vm->segment_tail;
+ vm->segment_tail = sp;
+ }
+
+ /* If XP is at the list head SP replaces it at the head. */
+ if (vm->segment_head == xp)
+ vm->segment_head = sp;
+ }
+
+ /* Scan the new cache list to see if the accessed file is in the
+ * allocated portion of the list.
+ */
+ for (sp = vm->segment_head, spaceused=map=0; sp; sp = sp->next) {
+ if (sp == first) {
+ map = (spaceused + vm_nbytes <= vm->cachesize);
+ break;
+ } else if (sp->addr && !isfile(sp,st)) {
+ spaceused += sp->nbytes;
+ if (spaceused >= vm->cachesize)
+ break;
+ }
+ }
+
+ /* Map the file if it lies above the cutoff point. */
+ if (map) {
+ /* Free sufficient memory pages for the new region. If space
+ * is already allocated to this file don't free it unnecessarily.
+ */
+ for (sp = first, n=vm_nbytes; sp; sp = sp->nexthash)
+ if (isfile(sp,st) && sp->addr)
+ n -= sp->nbytes;
+
+ if (n > 0)
+ vm_reservespace (vm, n);
+
+ /* Map the accessed file segments. */
+ for (sp = first, n=vm_nbytes; sp; sp = sp->nexthash) {
+ if (!isfile(sp,st))
+ continue;
+
+ if (!sp->addr) {
+ void *addr;
+
+ addr = mmap (NULL, (size_t)sp->nbytes,
+ sp->acmode, MAP_SHARED, fd, (off_t)sp->offset);
+ if (!addr) {
+ map = 0;
+ break;
+ }
+
+ /* Lock segment in memory if indicated. */
+ if (vm->lockpages && vm->cache_enabled)
+ mlock (addr, (size_t) sp->nbytes);
+
+ vm->cacheused += sp->nbytes;
+ sp->addr = addr;
+ }
+
+ /* Preload the accessed file segment. */
+ vm_readahead (vm, sp->addr, sp->nbytes);
+ }
+
+ status = 1;
+ }
+
+ close (fd);
+ return (status);
+}
+
+
+/* VM_STATFILE -- Determine if the named file is in the cache. A return
+ * value of -1 indicates that the named file does not exist or could not
+ * be accessed. A value of zero indicates that the file is not cached.
+ * A value of 1 or more indicates the number of file segments in the cache.
+ */
+vm_statfile (vm, fname)
+register VMcache *vm;
+char *fname;
+{
+ register Segment *sp;
+ struct stat st;
+ int status=0;
+
+ if (debug)
+ fprintf (stderr, "vm_statfile (0x%x, \"%s\")\n", vm, fname);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (stat (fname, &st) < 0)
+ return (-1);
+
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash)
+ if (isfile(sp,st))
+ status++;
+
+ return (status);
+}
+
+
+/* VM_SETPRIORITY -- Set the user-defined priority of a file already in the
+ * cache list from a prior access or cache call. If the file priority is
+ * zero it will never be cached in memory. A priority of 1 is neutral;
+ * higher values increase the cache priority of the file.
+ */
+vm_setpriority (vm, fname, priority)
+register VMcache *vm;
+char *fname;
+int priority;
+{
+ register Segment *sp;
+ struct stat st;
+ int status=0;
+
+ if (priority < 0)
+ priority = 0;
+
+ if (debug)
+ fprintf (stderr, "vm_setpriority (0x%x, \"%s\", %d)\n",
+ vm, fname, priority);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (stat (fname, &st) < 0)
+ return (-1);
+
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash)
+ if (isfile(sp,st))
+ sp->userpri = priority;
+
+ return (status);
+}
+
+
+/* VM_CACHEFILE -- Cache an entire named file in the VM cache.
+ */
+vm_cachefile (vm, fname, flags)
+register VMcache *vm;
+char *fname;
+int flags;
+{
+ struct stat st;
+ int fd;
+
+ if (debug)
+ fprintf (stderr, "vm_cachefile (0x%x, \"%s\", 0%o)\n",
+ vm, fname, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if ((fd = open (fname, O_RDONLY)) < 0)
+ return (-1);
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ if (!vm_cacheregion (vm, fname, fd, 0L, st.st_size, VM_READONLY, 0)) {
+ close (fd);
+ return (-1);
+ }
+
+ close (fd);
+ if (!(flags & VM_LOCKFILE))
+ vm_uncachefile (vm, fname, 0);
+
+ return (0);
+}
+
+
+/* VM_CACHEFD -- Cache an already open file in the VM cache.
+ */
+vm_cachefd (vm, fd, acmode, flags)
+register VMcache *vm;
+int acmode;
+int flags;
+{
+ struct stat st;
+
+ if (debug)
+ fprintf (stderr, "vm_cachefd (0x%x, %d, 0%o, 0%o)\n",
+ vm, fd, acmode, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ if (!vm_cacheregion (vm, NULL, fd, 0L, st.st_size, acmode, flags))
+ return (-1);
+
+ if (!(flags & VM_LOCKFILE))
+ vm_uncachefd (vm, fd, 0);
+
+ return (0);
+}
+
+
+/* VM_UNCACHEFILE -- Identify a cached file as ready for reuse. The file
+ * remains in the cache, but its space is available for reuse on a least
+ * recently used basis. If it is desired to immediately free the space used
+ * by cached file immediately the VM_DESTROYREGION flag may be set in FLAGS.
+ */
+vm_uncachefile (vm, fname, flags)
+register VMcache *vm;
+char *fname;
+int flags;
+{
+ register Segment *sp;
+ struct stat st;
+ int status = 0;
+
+ if (debug)
+ fprintf (stderr, "vm_uncachefile (0x%x, \"%s\", 0%o)\n",
+ vm, fname, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (stat (fname, &st) < 0)
+ return (-1);
+
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash) {
+ if (!isfile(sp,st))
+ continue;
+ if (vm_uncache (vm, sp, flags) < 0)
+ status = -1;
+ }
+
+ return (status);
+}
+
+
+/* VM_UNCACHEFD -- Uncache an entire file identified by its file descriptor.
+ * The file remains in the cache, but its space is available for reuse on a
+ * least recently used basis. If it is desired to immediately free the space
+ * used by cached file immediately the VM_DESTROYREGION flag may be set in
+ * FLAGS.
+ */
+vm_uncachefd (vm, fd, flags)
+register VMcache *vm;
+int fd;
+int flags;
+{
+ register Segment *sp;
+ struct stat st;
+ int status = 0;
+
+ if (debug)
+ fprintf (stderr, "vm_uncachefd (0x%x, %d, 0%o)\n",
+ vm, fd, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash) {
+ if (!isfile(sp,st))
+ continue;
+ if (vm_uncache (vm, sp, flags) < 0)
+ status = -1;
+ }
+
+ return (status);
+}
+
+
+/* VM_REFRESHFILE -- Refresh an entire named file in the VM cache.
+ * If the file is not in the cache nothing is done and -1 is returned.
+ * If the file is cached it is refreshed, i.e., moved to the head of
+ * the cache, reloading any pages not already present in memory.
+ */
+vm_refreshfile (vm, fname, flags)
+register VMcache *vm;
+char *fname;
+int flags;
+{
+ struct stat st;
+ int fd;
+
+ if (debug)
+ fprintf (stderr, "vm_refreshfile (0x%x, \"%s\", 0%o)\n",
+ vm, fname, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if ((fd = open (fname, O_RDONLY)) < 0)
+ return (-1);
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ if (!vm_refreshregion (vm, fd, 0L, st.st_size)) {
+ close (fd);
+ return (-1);
+ }
+
+ close (fd);
+ return (0);
+}
+
+
+/* VM_REFRESHFD -- Refresh an already open file in the VM cache.
+ */
+vm_refreshfd (vm, fd, flags)
+register VMcache *vm;
+int fd;
+int flags;
+{
+ struct stat st;
+
+ if (debug)
+ fprintf (stderr, "vm_refreshfd (0x%x, %d, 0%o)\n",
+ vm, fd, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ if (!vm_refreshregion (vm, fd, 0L, st.st_size))
+ return (-1);
+
+ return (0);
+}
+
+
+/* VM_CACHEREGION -- Cache a region or segment of a file. File segments are
+ * removed from the tail of the LRU cache list until sufficient space is
+ * available for the new segment. The new file segment is then mapped and a
+ * request is issued to asynchronously read in the file data. The virtual
+ * memory address of the cached and mapped region is returned.
+ *
+ * File segments may be redundantly cached in which case the existing
+ * mapping is refreshed and the segment is moved to the head of the cache.
+ * Each cache operation increments the reference count of the region and
+ * a matching uncache is required to eventually return the reference count
+ * to zero allowing the space to be reused. vm_refreshregion can be called
+ * instead of cacheregion if all that is desired is to refresh the mapping
+ * and move the cached region to the head of the cache. A single file may
+ * be cached as multiple segments but the segments must be page aligned
+ * and must not overlap. The virtual memory addresses of independent segments
+ * may not be contiguous in virtual memory even though the corresponding
+ * file regions are. If a new segment overlaps an existing segment it must
+ * fall within the existing segment as the size of a segment cannot be changed
+ * once it is created. If a file is expected to grow in size after it is
+ * cached, the size of the cached region must be at least as large as the
+ * expected size of the file.
+ *
+ * vm_cacheregion can (should) be used instead of MMAP to map files into
+ * memory, if the files will be managed by the VM cache controller. Otherwise
+ * the same file may be mapped twice by the same process, which may use
+ * extra virtual memory. Only files can be mapped using vm_cacheregion, and
+ * all mappings are for shared data.
+ *
+ * If the cache is disabled vm_cacheregion will still map file segments into
+ * memory, and vm_uncacheregion will unmap them when the reference count goes
+ * to zero (regardless of whether the VM_DESTROYREGION flag is set if the
+ * cache is disabled).
+ *
+ * If write access to a segment is desired the file referenced by FD must
+ * have already been opened with write permission.
+ */
+void *
+vm_cacheregion (vm, fname, fd, offset, nbytes, acmode, flags)
+register VMcache *vm;
+char *fname;
+int fd;
+unsigned long offset;
+unsigned long nbytes;
+int acmode, flags;
+{
+ register Segment *sp, *xp;
+ unsigned long x0, x1, vm_offset, vm_nbytes;
+ struct stat st;
+ int mode;
+ void *addr;
+
+ if (debug)
+ fprintf (stderr,
+ "vm_cacheregion (0x%x, \"%s\", %d, %d, %d, 0%o, 0%o)\n",
+ vm, fname, fd, offset, nbytes, acmode, flags);
+ if (fstat (fd, &st) < 0)
+ return (NULL);
+
+ /* Align offset,nbytes to fill the referenced memory pages.
+ */
+ x0 = offset;
+ x0 = (x0 - (x0 % vm->pagesize));
+
+ x1 = offset + nbytes - 1;
+ x1 = (x1 - (x1 % vm->pagesize)) + vm->pagesize - 1;
+
+ vm_offset = x0;
+ vm_nbytes = x1 - x0 + 1;
+
+ /* Is this a reference to an already cached segment?
+ */
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash) {
+ if (!isfile(sp,st))
+ continue;
+
+ if (x0 >= sp->offset && x0 < (sp->offset + sp->nbytes))
+ if (x1 >= sp->offset && x1 < (sp->offset + sp->nbytes)) {
+ /* New segment lies entirely within an existing one. */
+ vm_offset = sp->offset;
+ vm_nbytes = sp->nbytes;
+ goto refresh;
+ } else {
+ /* New segment extends an existing one. */
+ return (NULL);
+ }
+ }
+
+ mode = PROT_READ;
+ if (acmode == VM_READWRITE)
+ mode |= PROT_WRITE;
+
+ if (flags & VM_DONTMAP)
+ addr = NULL;
+ else {
+ /* Free sufficient memory pages for the new region. */
+ vm_reservespace (vm, vm_nbytes);
+
+ /* Map the new segment, reusing the VM pages freed above. */
+ addr = mmap (NULL,
+ (size_t)vm_nbytes, mode, MAP_SHARED, fd, (off_t)vm_offset);
+ if (!addr)
+ return (NULL);
+
+ /* Lock segment in memory if indicated. */
+ if (vm->lockpages && vm->cache_enabled)
+ mlock (addr, (size_t) vm_nbytes);
+
+ vm->cacheused += vm_nbytes;
+ }
+
+ /* Get a segment descriptor for the new segment. */
+ if (!(sp = (Segment *) calloc (1, sizeof(Segment)))) {
+ if (addr)
+ munmap (addr, vm_nbytes);
+ return (NULL);
+ }
+
+ vm->cachelen++;
+ sp->fd = fd;
+ sp->acmode = acmode;
+ sp->inode = st.st_ino;
+ sp->device = st.st_dev;
+ sp->offset = vm_offset;
+ sp->nbytes = vm_nbytes;
+ sp->addr = addr;
+ sp->ptime = time(0);
+ sp->userpri = vm->defuserpri;
+ if (fname) {
+ sp->fname = (char *) malloc (strlen(fname)+1);
+ strcpy (sp->fname, fname);
+ }
+
+ /* Set up the new segment at the head of the cache. */
+ sp->next = vm->segment_head;
+ sp->prev = NULL;
+ if (vm->segment_head)
+ vm->segment_head->prev = sp;
+ vm->segment_head = sp;
+
+ /* If there is nothing at the tail of the cache yet this element
+ * becomes the tail of the cache list.
+ */
+ if (!vm->segment_tail)
+ vm->segment_tail = sp;
+ if (!vm->last_mapped)
+ vm->last_mapped = sp;
+
+ /* Add the segment to the global file hash table.
+ */
+ if (xp = vm_locate(vm,st.st_dev,st.st_ino)) {
+ /* The file is already in the hash table. Add the new segment
+ * to the tail of the file segment list.
+ */
+ while (xp->nexthash)
+ xp = xp->nexthash;
+ xp->nexthash = sp;
+
+ } else {
+ /* Add initial file segment to hash table. */
+ int hashval;
+
+ hashval = hashint (SZ_HASHTBL, (int)st.st_dev, (int)st.st_ino);
+ if (xp = hashtbl[hashval]) {
+ while (xp->nexthash)
+ xp = xp->nexthash;
+ xp->nexthash = sp;
+ } else
+ hashtbl[hashval] = sp;
+ }
+
+refresh:
+ /* Move a new or existing segment to the head of the cache and
+ * increment the reference count. Refresh the segment pages if
+ * indicated.
+ */
+ if (vm->segment_head != sp) {
+ /* Unlink the list element. */
+ if (sp->next)
+ sp->next->prev = sp->prev;
+ if (sp->prev)
+ sp->prev->next = sp->next;
+
+ /* Link current segment at head of cache. */
+ sp->next = vm->segment_head;
+ sp->prev = NULL;
+ if (vm->segment_head)
+ vm->segment_head->prev = sp;
+ vm->segment_head = sp;
+
+ if (!vm->segment_tail)
+ vm->segment_tail = sp;
+ }
+
+ /* Preload the referenced segment if indicated. */
+ if (vm->cache_enabled && !(flags & VM_DONTMAP))
+ vm_readahead (vm, addr, vm_nbytes);
+
+ sp->refcnt++;
+ sp->nrefs++;
+ sp->atime = time(0);
+ sp->priority = vm_cachepriority (vm, sp);
+
+ return ((void *)((char *)addr + (offset - vm_offset)));
+}
+
+
+/* VM_UNCACHEREGION -- Called after a vm_cacheregion to indicate that the
+ * cached region is available for reuse. For every call to vm_cacheregion
+ * there must be a corresponding call to vm_uncacheregion before the space
+ * used by the region can be reused. Uncaching a region does not immediately
+ * free the space used by the region, it merely decrements a reference
+ * count so that the region can later be freed and reused if its space is
+ * needed. The region remains in the cache and can be immediately reclaimed
+ * by a subequent vm_cacheregion. If it is known that the space will not
+ * be reused, it can be freed immediately by setting the VM_DESTROYREGION
+ * flag in FLAGS.
+ */
+vm_uncacheregion (vm, fd, offset, nbytes, flags)
+register VMcache *vm;
+int fd;
+unsigned long offset;
+unsigned long nbytes;
+int flags;
+{
+ register Segment *sp;
+ unsigned long x0, x1, vm_offset, vm_nbytes;
+ struct stat st;
+ int mode;
+
+ if (debug)
+ fprintf (stderr, "vm_uncacheregion (0x%x, %d, %d, %d, 0%o)\n",
+ vm, fd, offset, nbytes, flags);
+
+ /* Map offset,nbytes to a range of memory pages.
+ */
+ x0 = offset;
+ x0 = (x0 - (x0 % vm->pagesize));
+
+ x1 = offset + nbytes - 1;
+ x1 = (x1 - (x1 % vm->pagesize)) + vm->pagesize - 1;
+
+ vm_offset = x0;
+ vm_nbytes = x1 - x0 + 1;
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ /* Locate the referenced segment. */
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash)
+ if (isfile(sp,st) && (sp->offset == vm_offset))
+ break;
+ if (!sp)
+ return (-1); /* not found */
+
+ return (vm_uncache (vm, sp, flags));
+}
+
+
+/* VM_REFRESHREGION -- Refresh an already cached file region. The region is
+ * moved to the head of the cache and preloading of any non-memory resident
+ * pages is initiated.
+ */
+vm_refreshregion (vm, fd, offset, nbytes)
+register VMcache *vm;
+int fd;
+unsigned long offset;
+unsigned long nbytes;
+{
+ register Segment *sp;
+ unsigned long x0, x1, vm_offset, vm_nbytes;
+ struct stat st;
+ int mode;
+ void *addr;
+
+ if (debug)
+ fprintf (stderr, "vm_refreshregion (0x%x, %d, %d, %d)\n",
+ vm, fd, offset, nbytes);
+
+ if (!vm->cache_enabled)
+ return (0);
+
+ /* Map offset,nbytes to a range of memory pages.
+ */
+ x0 = offset;
+ x0 = (x0 - (x0 % vm->pagesize));
+
+ x1 = offset + nbytes - 1;
+ x1 = (x1 - (x1 % vm->pagesize)) + vm->pagesize - 1;
+
+ vm_offset = x0;
+ vm_nbytes = x1 - x0 + 1;
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ /* Locate the referenced segment. */
+ for (sp = vm_locate(vm,st.st_ino,st.st_dev); sp; sp = sp->nexthash)
+ if (isfile(sp,st) && (sp->offset == vm_offset))
+ break;
+ if (!sp)
+ return (-1); /* not found */
+
+ /* Relink the segment at the head of the cache.
+ */
+ if (vm->last_mapped == sp && sp->prev)
+ vm->last_mapped = sp->prev;
+
+ if (vm->segment_head != sp) {
+ /* Unlink the list element. */
+ if (sp->next)
+ sp->next->prev = sp->prev;
+ if (sp->prev)
+ sp->prev->next = sp->next;
+
+ /* Link current segment at head of cache. */
+ sp->next = vm->segment_head;
+ sp->prev = NULL;
+ if (vm->segment_head)
+ vm->segment_head->prev = sp;
+ vm->segment_head = sp;
+ }
+
+ sp->nrefs++;
+ sp->atime = time(0);
+ sp->priority = vm_cachepriority (vm, sp);
+
+ /* Preload any missing pages from the referenced segment. */
+ madvise (addr, vm_nbytes, MADV_WILLNEED);
+
+ return (0);
+}
+
+
+/* VM_UNCACHE -- Internal routine to free a cache segment.
+ */
+static
+vm_uncache (vm, sp, flags)
+register VMcache *vm;
+register Segment *sp;
+int flags;
+{
+ register Segment *xp;
+ Segment *first, *last;
+ int hashval, status=0, mode;
+
+ if (debug)
+ fprintf (stderr, "vm_uncache (0x%x, 0x%x, 0%o)\n", vm, sp, flags);
+
+ /* Decrement the reference count. Setting VM_CANCELREFCNT (as in
+ * closecache) causes any references to be ignored.
+ */
+ if (--sp->refcnt < 0 || (flags & VM_CANCELREFCNT))
+ sp->refcnt = 0;
+
+ /* If the reference count is zero and the VM_DESTROYREGION flag is
+ * set, try to free up the pages immediately, otherwise merely
+ * decrement the reference count so that it can be reused if it is
+ * referenced before the space it uses is reclaimed by another cache
+ * load.
+ */
+ if (!sp->refcnt && ((flags & VM_DESTROYREGION) || !vm->cache_enabled)) {
+ if (vm->cache_enabled)
+ madvise (sp->addr, sp->nbytes, MADV_DONTNEED);
+ if (munmap (sp->addr, sp->nbytes) < 0)
+ status = -1;
+ vm->cacheused -= sp->nbytes;
+
+ /* Remove the segment from the file hash table. */
+ first = vm_locate (vm, sp->device, sp->inode);
+ hashval = hashint (SZ_HASHTBL, sp->device, sp->inode);
+
+ for (xp=first, last=NULL; xp; last=xp, xp=xp->nexthash)
+ if (xp == sp) {
+ if (last)
+ last->nexthash = sp->nexthash;
+ if (hashtbl[hashval] == sp)
+ hashtbl[hashval] = sp->nexthash;
+ break;
+ }
+
+ /* Update last_mapped if it points to this segment. */
+ if (vm->last_mapped == sp && sp->prev)
+ vm->last_mapped = sp->prev;
+
+ /* Unlink and free the segment descriptor. */
+ if (sp->next)
+ sp->next->prev = sp->prev;
+ if (sp->prev)
+ sp->prev->next = sp->next;
+ if (vm->segment_head == sp)
+ vm->segment_head = sp->next;
+ if (vm->segment_tail == sp)
+ vm->segment_tail = sp->prev;
+
+ if (sp->fname)
+ free (sp->fname);
+ free ((void *)sp);
+ vm->cachelen--;
+ }
+
+ return (status);
+}
+
+
+/* VM_RESERVESPACE -- Free space in the cache, e.g. to create space to cache
+ * a new file or file segment. File segments are freed at the tail of the
+ * cache list until the requested space is available. Only segments which
+ * have a reference count of zero are freed. We do not actually remove
+ * segments from the cache here, we just free any mapped pages.
+ */
+vm_reservespace (vm, nbytes)
+register VMcache *vm;
+unsigned long nbytes;
+{
+ register Segment *sp;
+ unsigned long freespace = vm->cachesize - vm->cacheused;
+ int locked_segment_seen = 0;
+
+ if (debug)
+ fprintf (stderr, "vm_reservespace (0x%x, %d)\n", vm, nbytes);
+
+ if (!vm->cache_enabled)
+ return (0);
+
+ for (sp = vm->last_mapped; sp; sp = sp->prev) {
+ freespace = vm->cachesize - vm->cacheused;
+ if (freespace > nbytes)
+ break;
+
+ if (sp->refcnt) {
+ locked_segment_seen++;
+ continue;
+ } else if (!sp->addr)
+ continue;
+
+ if (debug)
+ fprintf (stderr, "vm_reservespace: free %d bytes at 0x%x\n",
+ sp->nbytes, sp->addr);
+
+ madvise (sp->addr, sp->nbytes, MADV_DONTNEED);
+ munmap (sp->addr, sp->nbytes);
+ vm->cacheused -= sp->nbytes;
+ sp->addr = NULL;
+
+ if (sp == vm->last_mapped && !locked_segment_seen)
+ vm->last_mapped = sp->prev;
+ }
+
+ return ((freespace >= nbytes) ? 0 : -1);
+}
+
+
+/* VM_STATUS -- Return a description of the status and contents of the VM
+ * cache. The output is written to the supplied text buffer.
+ */
+vm_status (vm, outbuf, maxch, flags)
+register VMcache *vm;
+char *outbuf;
+int maxch, flags;
+{
+ register Segment *sp;
+ register char *op = outbuf;
+ char buf[SZ_LINE];
+ int seg, nseg;
+
+ sprintf (buf, "initialized %d\n", vm->cache_initialized);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "enabled %d\n", vm->cache_enabled);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "lockpages %d\n", vm->lockpages);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "physmem %d\n", vm->physmem);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "cachesize %d\n", vm->cachesize);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "cacheused %d\n", vm->cacheused);
+ strcpy (op, buf); op += strlen (buf);
+
+ sprintf (buf, "pagesize %d\n", vm->pagesize);
+ strcpy (op, buf); op += strlen (buf);
+
+ for (nseg=0, sp = vm->segment_head; sp; sp = sp->next)
+ nseg++;
+ sprintf (buf, "nsegments %d\n", nseg);
+ strcpy (op, buf); op += strlen (buf);
+
+ for (seg=0, sp = vm->segment_head; sp; sp = sp->next, seg++) {
+ sprintf (buf, "segment %d inode %d device %d ",
+ seg, sp->inode, sp->device);
+ sprintf (buf+strlen(buf), "offset %d nbytes %d refcnt %d %s\n",
+ sp->offset, sp->nbytes, sp->refcnt,
+ sp->fname ? sp->fname : "[done]");
+ if (op-outbuf+strlen(buf) >= maxch)
+ break;
+ strcpy (op, buf); op += strlen (buf);
+ }
+
+ return (op - outbuf);
+}
+
+
+/* VM_LOCATE -- Internal routine to locate the initial segment of a cached
+ * file given its device and inode. NULL is returned if the referenced file
+ * has no segments in the cache.
+ */
+static Segment *
+vm_locate (vm, device, inode)
+VMcache *vm;
+register dev_t device;
+register ino_t inode;
+{
+ register Segment *sp;
+ int hashval;
+
+ hashval = hashint (SZ_HASHTBL, device, inode);
+ for (sp = hashtbl[hashval]; sp; sp = sp->nexthash)
+ if (sp->device == device && sp->inode == inode)
+ return (sp);
+
+ return (NULL);
+}
+
+
+/* HASHINT -- Hash a pair of integer values. An integer hash value in the
+ * range 0-nthreads is returned.
+ */
+static int
+hashint (nthreads, w1, w2)
+int nthreads;
+register int w1, w2;
+{
+ unsigned int h1, h2;
+ register int i=0;
+
+ h1 = (((w1 >> 16) * primes[i++]) ^ (w1 * primes[i++]));
+ h2 = (((w2 >> 16) * primes[i++]) ^ (w2 * primes[i++]));
+
+ return ((h1 ^ h2) % nthreads);
+}
+
+
+/* VM_CACHEPRIORITY -- Compute the cache priority of a file segment. Various
+ * heuristics are possible for computing the cache priority of a segment.
+ * The one used here assigns a priority which scales with a user defined
+ * per-file priority, and which is a function of the number of recent
+ * references to the file. The USERPRI, REFBASE, and TOCK parameters can
+ * be used (possibly in combination with manual cache control commands) to
+ * tune the algorithm for the expected file activity.
+ */
+static int
+vm_cachepriority (vm, sp)
+register VMcache *vm;
+register Segment *sp;
+{
+ register int priority = 0;
+ time_t curtime = time(NULL);
+
+ /* A user-specified priority of zero overrides. */
+ if (sp->userpri <= 0)
+ return (0);
+
+ /* Compute the cache priority for the segment. */
+ priority = (sp->nrefs - vm->refbase) -
+ ((curtime - sp->atime) / vm->tock);
+ if (priority < 0)
+ priority = 0;
+ priority *= sp->userpri;
+
+ /* Degrade nrefs every tock seconds if the file is not being
+ * accessed.
+ */
+ if (sp->atime > sp->ptime)
+ sp->ptime = sp->atime;
+ else if ((curtime - sp->ptime) > vm->tock) {
+ sp->nrefs -= ((curtime - sp->ptime) / vm->tock);
+ if (sp->nrefs < 0)
+ sp->nrefs = 0;
+ sp->ptime = curtime;
+ }
+
+ return (priority);
+}
+
+
+/* VM_SYNC -- Sync (update on disk) any pages of virtual memory mapped to
+ * the given region of the given file. If nbytes=0, any mapped regions of
+ * the given file are synced. If the VM_ASYNC flag is set the sync operation
+ * will be performed asynchronously and vm_sync will return immediately,
+ * otherwise vm_sync waits for the synchronization operation to complete.
+ */
+vm_sync (vm, fd, offset, nbytes, flags)
+register VMcache *vm;
+int fd;
+unsigned long offset;
+unsigned long nbytes;
+int flags;
+{
+ register Segment *sp;
+ unsigned long x0, x1, vm_offset, vm_nbytes;
+ int syncflag, status = 0;
+ struct stat st;
+
+ if (debug)
+ fprintf (stderr, "vm_sync (0x%x, %d, %d, %d, 0%o)\n",
+ vm, fd, offset, nbytes, flags);
+ if (!vm->cache_enabled)
+ return (0);
+
+ /* Map offset,nbytes to a range of memory pages.
+ */
+ x0 = offset;
+ x0 = (x0 - (x0 % vm->pagesize));
+
+ x1 = offset + nbytes - 1;
+ x1 = (x1 - (x1 % vm->pagesize)) + vm->pagesize - 1;
+
+ vm_offset = x0;
+ vm_nbytes = x1 - x0 + 1;
+
+#ifdef sun
+#ifdef _SYS_SYSTEMINFO_H
+ /* This is a mess. The values of MS_SYNC,MS_ASYNC changed between
+ * Solaris 2.6 and 2.7. This code assumes that the system is
+ * being built on a Solaris 2.7 or greater system, but the wired-in
+ * values below allow the executable to be run on earlier versions.
+ */
+ {
+ char buf[SZ_NAME]; /* e.g. "5.7" */
+
+ sysinfo (SI_RELEASE, buf, SZ_NAME);
+ if (buf[0] >= '5' && buf[2] >= '7')
+ syncflag = (flags & VM_ASYNC) ? MS_ASYNC : MS_SYNC;
+ else
+ syncflag = (flags & VM_ASYNC) ? 0x1 : 0x0;
+ }
+#else
+ syncflag = (flags & VM_ASYNC) ? MS_ASYNC : MS_SYNC;
+#endif
+#else
+ syncflag = (flags & VM_ASYNC) ? MS_ASYNC : MS_SYNC;
+#endif
+
+ if (fstat (fd, &st) < 0)
+ return (-1);
+
+ /* Locate the referenced segment. */
+ for (sp = vm->segment_head; sp; sp = sp->next) {
+ if (!isfile(sp,st))
+ continue;
+
+ if (!nbytes || sp->offset == vm_offset)
+ if (msync (sp->addr, sp->nbytes, syncflag))
+ status = -1;
+ }
+
+ return (status);
+}
+
+
+/* VM_MSYNC -- Sync the given region of virtual memory. This routine does
+ * not require that the caller know the file to which the memory is mapped.
+ * If the VM_ASYNC flag is set the sync operation will be performed
+ * asynchronously and vm_sync will return immediately, therwise vm_sync waits
+ * for the synchronization operation to complete.
+ */
+vm_msync (vm, addr, nbytes, flags)
+register VMcache *vm;
+void *addr;
+unsigned long nbytes;
+int flags;
+{
+ register Segment *sp;
+ unsigned long addr1, addr2;
+ int syncflag;
+
+ if (debug)
+ fprintf (stderr, "vm_msync (0x%x, 0x%x, %d, 0%o)\n",
+ vm, addr, nbytes, flags);
+
+ /* Align the given address region to the page boundaries.
+ */
+ addr1 = ((long)addr - ((long)addr % vm->pagesize));
+ addr2 = (long)addr + nbytes - 1;
+ addr2 = (addr2 - (addr2 % vm->pagesize)) + vm->pagesize - 1;
+ syncflag = (flags & VM_ASYNC) ? MS_ASYNC : MS_SYNC;
+
+ return (msync ((void *)addr1, addr2 - addr1 + 1, syncflag));
+}
+
+
+/* VM_READAHEAD -- Internal routine used to request that a segment of file
+ * data be preloaded.
+ */
+static
+vm_readahead (vm, addr, nbytes)
+register VMcache *vm;
+void *addr;
+unsigned long nbytes;
+{
+ register int n, nb;
+ int chunk = READAHEAD * vm->pagesize;
+ unsigned long buf = (unsigned long) addr;
+
+ /* Break large reads into chunks of READAHEAD memory pages. This
+ * increases the chance that file access and computation can overlap
+ * the readahead i/o.
+ */
+ for (n=0; n < nbytes; n += chunk) {
+ nb = nbytes - n;
+ if (nb > chunk)
+ nb = chunk;
+ madvise ((void *)(buf + n), nb, MADV_WILLNEED);
+ }
+}
diff --git a/unix/boot/vmcached/vmcache.h b/unix/boot/vmcached/vmcache.h
new file mode 100644
index 00000000..3304b8dd
--- /dev/null
+++ b/unix/boot/vmcached/vmcache.h
@@ -0,0 +1,19 @@
+/*
+ * VMCACHE.H -- Public definitions for the VMcache interface.
+ */
+
+#define DEF_VMSOCK 8677
+#define ENV_VMSOCK "VMPORT"
+
+#define VM_READONLY 0001
+#define VM_READWRITE 0002
+#define VM_WRITEONLY 0004
+#define VM_ASYNC 0010
+#define VM_SYNC 0020
+#define VM_LOCKFILE 0040
+#define VM_DESTROYREGION 0100
+#define VM_CANCELREFCNT 0200
+#define VM_DONTMAP 0400
+
+void *vm_initcache();
+void *vm_cacheregion();
diff --git a/unix/boot/vmcached/vmcached.c b/unix/boot/vmcached/vmcached.c
new file mode 100644
index 00000000..5acccdea
--- /dev/null
+++ b/unix/boot/vmcached/vmcached.c
@@ -0,0 +1,568 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <ctype.h>
+#include "vmcache.h"
+
+#define NOKNET
+#define import_spp
+#define import_knames
+#include <iraf.h>
+
+/*
+ * VMCACHED -- VMcache daemon.
+ *
+ * The VMcache daemon controls a virtual memory cache for optimizing file
+ * storage in virtual memory on a single host computer. Clients can connect
+ * to the daemon to request that files be cached or uncached, query whether
+ * a file is cached, modify cache parameters, or query the status of the
+ * cache.
+ */
+
+#define MAX_CLIENTS 256
+#define MAX_ARGS 32
+#define SZ_STATBUF 8192
+#define SZ_CMDBUF 8192
+#define SZ_NAME 32
+#define DEF_CACHESIZE "50%"
+#define DEF_PHYSPAGES 32768
+#define DEF_PRIORITY 1
+#define DEF_REFBASE 1
+#define DEF_TOCK 600
+
+
+/* Client connection. */
+struct client {
+ int fd;
+ FILE *out;
+ char name[SZ_NAME+1];
+}; typedef struct client Client;
+
+Client client[MAX_CLIENTS];
+int nclients;
+int maxclients;
+int debug;
+int running;
+extern char *getenv();
+void *vm;
+
+
+/* MAIN -- VMCACHED main program.
+ */
+main (argc, argv)
+int argc;
+char **argv;
+{
+ char *argp, *op, *cachesize;
+ int socket, lockpages, defpri, refbase, tock;
+ int c_argc, fd, status, acmode, server, i;
+ char *c_argv[MAX_ARGS];
+ char initstr[SZ_FNAME];
+ char osfn[SZ_FNAME];
+ fd_set readfds;
+
+ cachesize = DEF_CACHESIZE;
+ socket = DEF_VMSOCK;
+ defpri = DEF_PRIORITY;
+ refbase = DEF_REFBASE;
+ tock = DEF_TOCK;
+ lockpages = 0;
+
+ /* The socket to be used can be set in the environment. */
+ if (argp = getenv (ENV_VMSOCK))
+ socket = atoi (argp);
+
+ /* Parse argument list. */
+ for (i=1; i < argc, argp = argv[i]; i++) {
+ if (argname (argp, "-k", "-port")) {
+ argp = (argv[++i]);
+ socket = atoi (argp);
+ } else if (argname (argp, "-s", "-cachesize")) {
+ argp = (argv[++i]);
+ cachesize = argp;
+ } else if (argname (argp, "-p", "-defpri")) {
+ argp = (argv[++i]);
+ defpri = atoi (argp);
+ } else if (argname (argp, "-b", "-refbase")) {
+ argp = (argv[++i]);
+ refbase = atoi (argp);
+ } else if (argname (argp, "-t", "-tock")) {
+ argp = (argv[++i]);
+ tock = atoi (argp);
+ } else if (argname (argp, "-l", "-lockpages")) {
+ lockpages++;
+ } else if (argname (argp, "-d", "-debug")) {
+ debug++;
+ } else
+ fprintf (stderr, "vmcached: unknown argument `%s'\n", argp);
+ }
+
+ /* Construct the initstr for VMcache. */
+ op = initstr;
+ sprintf (op, "cachesize=%s,defpri=%d,refbase=%d,tock=%d",
+ cachesize, defpri, refbase, tock);
+ if (lockpages) {
+ op = initstr + strlen(initstr);
+ strcat (op, ",lockpages");
+ }
+ if (debug) {
+ op = initstr + strlen(initstr);
+ strcat (op, ",debug");
+ }
+
+ if (debug)
+ fprintf (stderr, "vmcached: init vmcache `%s'\n", initstr);
+
+ /* Initialize the VM cache. */
+ if (!(vm = vm_initcache (NULL, initstr))) {
+ fprintf (stderr, "vmcached: failed to open socket `%s'\n", osfn);
+ exit (1);
+ }
+
+ /* Open the server port for incoming connections.
+ */
+ sprintf (osfn, "inet:%d::nonblock", socket);
+ acmode = NEW_FILE;
+ if (debug)
+ fprintf (stderr, "vmcached: open server socket `%s'\n", osfn);
+
+ ZOPNND (osfn, &acmode, &server);
+ if (server == XERR) {
+ fprintf (stderr, "vmcached: failed to open socket `%s'\n", osfn);
+ vm_closecache (vm);
+ exit (2);
+ }
+
+ if (debug)
+ fprintf (stderr, "vmcached: enter main server loop:\n");
+
+ /* Loop indefinitely waiting for new connections or client
+ * requests.
+ */
+ for (running=1; running; ) {
+ FD_ZERO (&readfds);
+ FD_SET (server, &readfds);
+ for (i=0; i < maxclients; i++)
+ if (client[i].fd)
+ FD_SET (client[i].fd, &readfds);
+ if (select (MAX_CLIENTS, &readfds, NULL, NULL, NULL) <= 0)
+ break;
+
+ /* Check for a new client connection. */
+ if (FD_ISSET (server, &readfds)) {
+ char buf[SZ_CMDBUF];
+ FILE *fdopen();
+ int fd, n;
+
+ if (debug)
+ fprintf (stderr, "vmcached: open new client connection: ");
+
+ /* Accept the connection. */
+ sprintf (osfn, "sock:%d", server);
+ acmode = NEW_FILE;
+ ZOPNND (osfn, &acmode, &fd);
+ if (fd == XERR)
+ exit (1);
+
+ for (i=0; i < MAX_CLIENTS; i++)
+ if (!client[i].fd)
+ break;
+ if (i >= MAX_CLIENTS) {
+ fprintf (stderr, "vmcached: too many clients\n");
+ ZCLSND (&fd, &status);
+ continue;
+ }
+
+ /* The client name is passed as data in an open. */
+ if ((n = read (fd, buf, SZ_CMDBUF)) > 0) {
+ strncpy (client[i].name, buf, SZ_NAME);
+ client[i].name[n < SZ_NAME ? n : SZ_NAME] = '\0';
+ }
+
+ if (debug)
+ fprintf (stderr, "fd=%d (%s)\n", fd, client[i].name);
+
+ client[i].fd = fd;
+ client[i].out = fdopen (fd, "w");
+ nclients++;
+ if (i >= maxclients)
+ maxclients = i + 1;
+
+ /* Send an acknowledge back to the client. */
+ c_argc = 1; c_argv[0] = client[i].name;
+ putstati (client[i].out, c_argc, c_argv, 0);
+ }
+
+ /* Check for command input from clients. Any command data
+ * must be sent as a complete command block. The block must
+ * be syntatically complete, by may contain multiple
+ * concatenated commands. If a command references any data
+ * not passed as part of the command, the data can be read
+ * from the client input stream during execution of the command.
+ */
+ for (i=0; i < MAX_CLIENTS; i++) {
+ Client *cx = &client[i];
+ if (!cx->fd)
+ continue;
+
+ if (FD_ISSET (cx->fd, &readfds)) {
+ int status, buflen;
+ char buf[SZ_CMDBUF];
+ char *ip, *itop;
+
+ if (debug) fprintf (stderr,
+ "vmcached: client input on fd=%d: ", cx->fd);
+
+ if ((buflen = read (cx->fd, buf, SZ_CMDBUF)) <= 0) {
+ if (debug)
+ fputs ("[EOF (disconnected)]\n", stderr);
+ goto disconnect;
+ }
+ if (debug) {
+ buf[buflen] = '\0';
+ fputs (buf, stderr);
+ }
+
+ ip = buf;
+ itop = buf + buflen;
+
+ while (getcmd (&ip, itop, &c_argc, c_argv) > 0)
+ if (execute (cx, c_argc, c_argv) > 0) {
+disconnect: fclose (cx->out);
+ ZCLSND (&cx->fd, &status);
+ cx->fd = 0;
+ cx->out = NULL;
+ nclients--;
+ if (maxclients == i+1)
+ maxclients--;
+ break;
+ }
+
+ if (cx->out)
+ fflush (cx->out);
+ }
+ }
+ }
+
+ if (debug)
+ fprintf (stderr, "vmcached: shutdown\n");
+
+ /* Close all client connections. */
+ for (i=0; i < maxclients; i++) {
+ Client *cx = &client[i];
+ if (cx->fd) {
+ fclose (cx->out);
+ close (cx->fd);
+ cx->fd = 0;
+ }
+ }
+
+ ZCLSND (&server, &status);
+ vm_closecache (vm);
+ exit (0);
+}
+
+
+/* EXECUTE -- Execute a vmcached directive.
+ *
+ * Directives are simple newline or semicolon delimited commands, with the
+ * arguments delimited by whitespace or quotes, e.g., :
+ *
+ * access /d1/iraf/h1904b.fits rw
+ *
+ * Multiple commands can be concatenated (with command delimiters) and sent
+ * as a batch if desired. They will be executed in sequence. Most commands
+ * result in a response to the client. These have the form
+ *
+ * <status> '=' <command> <args>
+ *
+ * for example,
+ *
+ * 1 = access /d1/iraf/h1904b.fits rw
+ *
+ * This form makes the status value easy to parse for simple commands.
+ * The command is echoed so that the status value can be matched to the
+ * command it is for, e.g., if multiple commands were issued.
+ */
+execute (cx, argc, argv)
+Client *cx;
+int argc;
+char *argv[];
+{
+ char *cmd = argv[0];
+ int execstat = 0;
+ int i, status = 0;
+
+ if (!cmd)
+ return (-1);
+
+ if (debug) {
+ fprintf (stderr, "vmcached: execute \"%s (", cmd);
+ for (i=1; i < argc; i++) {
+ if (i > 1)
+ fprintf (stderr, ", ");
+ fprintf (stderr, "%s", argv[i]);
+ }
+ fprintf (stderr, ")\"\n");
+ }
+
+ if (strcmp (cmd, "bye") == 0) {
+ /* Usage: bye
+ * Close a client connection.
+ */
+ execstat = 1;
+
+ } else if (strcmp (cmd, "quit") == 0) {
+ /* Usage: quit
+ * Shutdown vmcached and exit.
+ */
+ running = 0;
+
+ } else if (strcmp (cmd, "access") == 0) {
+ /* Usage: access <fname> [<mode>]
+ *
+ * Determine whether the named file should be accessed via the
+ * VMcache (via virtual memory / normal i/o) or via direct i/o,
+ * bypassing VM. In the simplest scenario we just check whether
+ * the named file is already in the cache, perhaps loaded via
+ * the cache directive by a control process. More complex
+ * strategies are possible, e.g., every access could be set up
+ * to automatically cache the referenced file; caching could be
+ * decided on a per-process basic depending upon access history,
+ * etc. A client about to access a file should issue an access
+ * directive to the cache to determine whether or not to use VM
+ * (e.g., normal file i/o) to access the file.
+ */
+ char *fname = argv[1];
+ char *mode = (argc > 2) ? argv[2] : "r";
+
+ if (!fname)
+ status = -1;
+ else
+ status = vm_access (vm, fname, mode, 0);
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "cache") == 0) {
+ /* Usage: cache <fname>
+ *
+ * Cache the named file. The file is asynchronously loaded
+ * into the VM cache.
+ */
+ char *fname = argv[1];
+
+ if (!fname)
+ status = -1;
+ else
+ status = vm_cachefile (vm, fname, 0);
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "uncache") == 0) {
+ /* Usage: uncache <fname>
+ *
+ * If the named file is present in the cache the space it is
+ * marked as ready for reuse. Any VM space used by the file is
+ * not immediately reused. The actual disk file is not affected.
+ */
+ char *fname = argv[1];
+
+ if (!fname)
+ status = -1;
+ else
+ status = vm_uncachefile (vm, fname, 0);
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "delete") == 0) {
+ /* Usage: delete <fname>
+ *
+ * If the named file is present in the cache it is removed from
+ * the cache, freeing the space to be used for other files. The
+ * actual disk file is not affected.
+ */
+ char *fname = argv[1];
+
+ if (!fname)
+ status = -1;
+ else {
+ status = vm_uncachefile (vm, fname,
+ VM_DESTROYREGION|VM_CANCELREFCNT);
+ }
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "refresh") == 0) {
+ /* Usage: refresh <fname>
+ *
+ * If the named file is present in the cache it is moved to the
+ * head of the cache (most recently referenced), and any missing
+ * file pages are asynchronously loaded from disk.
+ */
+ char *fname = argv[1];
+
+ if (!fname)
+ status = -1;
+ else
+ status = vm_refreshfile (vm, fname, 0);
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "reserve") == 0) {
+ /* Usage: reserve <nbytes>
+ *
+ * The indicated amount of space is made available in the cache.
+ * The space goes on the VM free list, for use to buffer data
+ * without paging out other data.
+ */
+ long nbytes = (argv[1]) ? atol(argv[1]) : 0;
+
+ if (!nbytes)
+ status = -1;
+ else
+ status = vm_reservespace (vm, nbytes);
+ putstati (cx->out, argc, argv, status);
+
+ } else if (strcmp (cmd, "status") == 0) {
+ /* Usage: status
+ *
+ * The status directive is used to query the status and contents
+ * of the VM cache. A description of all parameters and cached
+ * files is returned in text form.
+ */
+ char statbuf[SZ_STATBUF];
+
+ status = vm_status (vm, statbuf, SZ_STATBUF, 0);
+ putstats (cx->out, argc, argv, status);
+ fputs (statbuf, cx->out);
+
+ } else if (strcmp (cmd, "subscribe") == 0) {
+ /* Usage: subscribe */
+ fprintf (cx->out, "%s %d\n", cmd, status);
+
+ } else if (strcmp (cmd, "unsubscribe") == 0) {
+ /* Usage: unsubscribe */
+ fprintf (cx->out, "%s %d\n", cmd, status);
+
+ } else {
+ execstat = status = -1;
+ putstati (cx->out, argc, argv, status);
+ }
+
+ return (execstat);
+}
+
+
+/* PUTSTATI -- Return an integer valued command status to the client.
+ */
+putstati (fp, argc, argv, status)
+FILE *fp;
+int argc;
+char **argv;
+int status;
+{
+ register int i;
+
+ fprintf (fp, "%d = %s", status, argv[0]);
+ for (i=1; i < argc && argv[i]; i++)
+ fprintf (fp, " %s", argv[i]);
+ fprintf (fp, "\n");
+ fflush (fp);
+
+ if (debug)
+ fprintf (stderr, "vmcached: %s -> %d\n", argv[0], status);
+}
+
+
+/* PUTSTATS -- Return a string valued command status to the client.
+ */
+putstats (fp, argc, argv, status)
+FILE *fp;
+int argc;
+char **argv;
+char *status;
+{
+ register int i;
+
+ fprintf (fp, "%s = %s", status, argv[0]);
+ for (i=0; i < argc && argv[i]; i++)
+ fprintf (fp, " %s", argv[i]);
+ fprintf (fp, "\n");
+ fflush (fp);
+}
+
+
+/* ARGNAME -- Test whether a string is one of the named arguments.
+ */
+argname (arg, name1, name2)
+char *arg;
+char *name1, *name2;
+{
+ int status = 0;
+
+ if (name1)
+ status |= (strcmp (arg, name1) == 0);
+ if (name2)
+ status |= (strcmp (arg, name2) == 0);
+
+ return (status);
+}
+
+
+/* GETCMD -- Read a command from the input command block and parse it into
+ * the command name and arguments. The input pointer is left positioned
+ * to the text following the command. The command name is returned as
+ * argv[0];
+ */
+getcmd (ipp, itop, argc, argv)
+char **ipp;
+char *itop;
+int *argc;
+char *argv[];
+{
+ register char *ip = *ipp;
+ register char *argp;
+ int i, nargs = 0;
+
+ for (i=0; i < MAX_ARGS; i++)
+ argv[i] = NULL;
+
+ while (ip < itop && (*ip == ' ' || *ip == '\t'))
+ ip++;
+
+ /* Get command name and any arguments. */
+ while (ip < itop && *ip != '\n' && *ip != ';') {
+ /* Get next argument. */
+ argp = ip;
+
+ /* Quoted strings may include whitespace. The quote characters
+ * are omitted from the argument.
+ */
+ if (*ip == '\'') {
+ for (argp = ++ip; ip < itop && *ip != '\''; )
+ ip++;
+ } else if (*ip == '"') {
+ for (argp = ++ip; ip < itop && *ip != '"'; )
+ ip++;
+ } else {
+ while (ip < itop && !isspace(*ip)) {
+ if (*ip == '\\' && ip+1 < itop)
+ ip++;
+ ip++;
+ }
+ }
+
+ *ip++ = '\0';
+ if (argp[0])
+ argv[nargs++] = argp;
+
+ /* Skip forward to next argument. */
+ while (ip < itop && (*ip == ' ' || *ip == '\t'))
+ ip++;
+ }
+
+ /* Skip forward to next command line. */
+ while (ip < itop && (isspace(*ip) || *ip == ';'))
+ ip++;
+
+ *argc = nargs;
+ *ipp = ip;
+
+ return (nargs);
+}