19 files changed, 3743 insertions, 0 deletions
diff --git a/pkg/bench/README b/pkg/bench/README
new file mode 100644
index 00000000..0e892171
--- /dev/null
+++ b/pkg/bench/README
@@ -0,0 +1,2 @@
+BENCH -- IRAF benchmarks package.  Documented in the bench.hlp file in this
+directory.
diff --git a/pkg/bench/bench.cl b/pkg/bench/bench.cl
new file mode 100644
index 00000000..9a84da27
--- /dev/null
+++ b/pkg/bench/bench.cl
@@ -0,0 +1,23 @@
+images
+plot
+
+#{ BENCH -- Benchmarks package.
+
+package bench
+
+set	bench		= "pkg$bench/"
+
+task	fortask		= "bench$fortask.cl"
+task	subproc		= "bench$subproc.cl"
+task	plots		= "bench$plots.cl"
+
+task	$ptime,
+	$getpar,
+	$wipc.bb,
+	$rrbin,
+	$rbin,
+	$wbin,
+	$rtext,
+	$wtext		= "bench$x_bench.e"
+
+clbye()
diff --git a/pkg/bench/bench.hlp b/pkg/bench/bench.hlp
new file mode 100644
index 00000000..3b7a97b9
--- /dev/null
+++ b/pkg/bench/bench.hlp
@@ -0,0 +1,1723 @@
+.help bench Mar86 "IRAF Performance Tests"
+.ce
+\fBA Set of Benchmarks for Measuring IRAF System Performance\fR
+.ce
+Doug Tody
+.ce
+March 28, 1986
+.ce
+(Revised July 1987)
+
+.nh
+Introduction
+
+    This set of benchmarks has been prepared with a number of purposes in mind.
+Firstly, the benchmarks may be run after installing IRAF on a new system to
+verify that the performance expected for that machine is actually being
+achieved.  In general, this cannot be taken for granted since the performance
+actually achieved on a particular system can be highly dependent upon how the
+system is configured and tuned.  Secondly, the benchmarks may be run to compare
+the performance of different IRAF hosts, or to track the system performance
+over a period of time as improvements are made, both to IRAF and to the host
+system.  Lastly, the benchmarks provide a metric which can be used to tune
+the host system.
+
+All too often, the only benchmarks run on a system are those which test the
+execution time of optimized code generated by the host Fortran compiler.
+This is primarily a hardware benchmark and secondarily a test of the Fortran
+optimizer.  An example of this type of test is the famous Linpack benchmark.
+
+The numerical execution speed test is an important benchmark but it tests only
+one of the many factors contributing to the overall performance of the system
+as perceived by the user.  In interactive use other factors are often more
+important, e.g., the time required to spawn or communicate with a subprocess,
+the time required to access a file, the response of the system as the number
+of users (or processes) increases, and so on.  While the quality of optimized
+code is a critical factor for cpu intensive batch processing, other factors
+are often more important for sophisticated interactive applications.
+
+The benchmarks described here are designed to test, as fully as possible,
+the major factors contributing to the overall performance of the IRAF system
+on a particular host.  A major factor in the timings of each benchmark is
+of course the IRAF system itself, but comparisons of different hosts are
+nonetheless possible since the code is virtually identical on all hosts.
+The IRAF kernel is coded differently for each host, but the functions
+performed by the kernel are identical on each host, and in most cases the
+kernel operations are a negligible factor in the final timings.
+
+The IRAF version number, host operating system and associated version number,
+and the host computer hardware configuration are all important in interpreting
+the results of the benchmarks, and should always be recorded.
+
+.nh
+What is Measured
+
+    Each benchmark measures two quantities, the total cpu time required to
+execute the benchmark, and the total (wall) clock time required to execute the
+benchmark.  If the clock time measurement is to be of any value the benchmarks
+must be run on a single user system.  Given this "best time" measurement,
+it is not difficult to predict the performance to be expected on a loaded
+system.
+
+The total cpu time required to execute a benchmark consists of the "user" time
+plus the "system" time.  The "user" time is the cpu time spent executing
+the instructions comprising the user program.  The "system" time is the cpu
+time spent in kernel mode executing the system services called by the user
+program.  When possible we give both measurements, while in some cases only
+the user time is given, or only the sum of the user and system times.
+If the benchmark involves several concurrent processes no cpu time measurement
+may be possible on some systems.  The cpu time measurements are therefore
+only reliable for the simpler benchmarks.
+
+The clock time measurement will of course include both the user and system
+execution time, plus the time spent waiting for i/o.  Any minor system daemon
+processes executing while the benchmarks are being run may bias the clock
+time measurement slightly, but since these are a constant part of the host
+environment it is fair to include them in the timings.  Major system daemons
+which run infrequently (e.g., the print symbiont in VMS) should invalidate
+the benchmark.
+
+A comparison of the cpu and clock times tells whether the benchmark was cpu
+or i/o bound (assuming a single user system).  Those benchmarks involving
+compiled IRAF tasks do not include the process startup and pagein times
+(these are measured by a different benchmark), hence the task should be run
+once before running the benchmark to connect the subprocess and page in
+the memory used by the task.  A good procedure to follow is to run each
+benchmark once to start the process, and then repeat the benchmark three times,
+averaging the results.  If inconsistent results are obtained further iterations
+and/or monitoring of the host system are called for until a consistent result
+is achieved.
+
+Many benchmarks depend upon disk performance as well as compute cycles.
+For such a benchmark to be a meaningful measure of the i/o bandwidth of the
+system it is essential that no other users (or batch jobs) be competing for
+disk seeks on the disk used for the test file.  There are subtle things to
+watch out for in this regard, for example, if the machine is in a VMS cluster
+or on a local area network, processes on other nodes may be accessing the
+local disk, yet will not show up on a user login or process list on the local
+node.  It is always desirable to repeat each test several times or on several
+different disk devices, to ensure that no outside requests were being serviced
+while the benchmark was being run.  If the system has disk monitoring utilities
+use these to find an idle disk before running any benchmarks which do heavy i/o.
+
+Beware of disks which are nearly full; the maximum achievable i/o bandwidth
+will fall off rapidly as a disk fills up, due to disk fragmentation (the file
+must be stored in little pieces scattered all over the physical disk).
+Similarly, many systems (VMS, AOS/VS) suffer from disk fragmentation problems
+that gradually worsen as a files system ages, requiring that the disk
+periodically be backed off onto tape and then restored.  In some cases,
+disk fragmentation can cause the maximum achievable i/o bandwidth to degrade
+by an order of magnitude.
+
+.nh
+The Benchmarks
+    
+    Instructions are given for running each benchmark, and the operations
+performed by each benchmark are briefly described.  The system characteristics
+measured by the benchmark are briefly discussed.  A short mnemonic name is
+associated with each benchmark to identify it in the tables given in the
+\fIresults\fR section.
+
+.nh 2
+Host Level Benchmarks
+
+    The benchmarks discussed in this section are run at the host system level.
+The examples are given for the UNIX cshell, under the assumption that a host
+dependent example is better than none at all.  These commands must be
+translated by the user to run the benchmarks on a different system.
+
+.nh 3
+CL Startup/Shutdown [CLSS]
+
+    Go to the CL login directory, mark the time (the method by which this is
+done is system dependent), and startup the CL.  Enter the "logout" command
+while the CL is starting up so that the CL will not be idle (with the clock
+running) while the command is being entered.  Mark the final cpu and clock
+time and compute the difference.
+
+.nf
+	% time cl
+	logout
+.fi
+
+This is a complex benchmark but one which is of obvious importance to the
+IRAF user.  The benchmark is probably dominated by the cpu time required to
+start up the CL, i.e., start up the CL process, initialize the i/o system,
+initialize the environment, interpret the CL startup file, interpret the
+user LOGIN.CL file, connect and disconnect the x_system.e subprocess, and so on.
+Most of the remaining time is the overhead of the host operating system for
+the process spawns, page faults, file accesses, and so on.
+
+.nh 3
+Mkpkg (verify) [MKPKGV]
+
+    Go to the PKG directory and enter the (host system equivalent of the)
+following command.  The method by which the total cpu and clock times are
+computed is system dependent.
+
+.nf
+	% cd $iraf/pkg
+	% time mkpkg -n
+.fi
+
+This benchmark does a "no execute" make-package of the entire PKG suite of
+applications and systems packages.  This tests primarily the speed with which
+the host system can read directories, resolve pathnames, and return directory
+information for files.  Since the PKG directory tree is continually growing,
+this benchmark is only useful for comparing the same version of IRAF run on
+different hosts, or the same version of IRAF on the same host at different
+times.
+
+.nh 3
+Mkpkg (compile) [MKPKGC]
+
+    Go to the directory "iraf$pkg/bench/xctest" and enter the (host system
+equivalents of the) following commands.  The method by which the total cpu
+and clock times are computed is system dependent.  Only the \fBmkpkg\fR
+command should be timed.
+
+.nf
+	% cd $iraf/pkg/bench/xctest
+	% mkpkg clean		# delete old library, etc., if present
+	% time mkpkg
+	% mkpkg clean		# delete newly created binaries
+.fi
+
+This tests the time required to compile and link a small IRAF package.
+The timings reflect the time required to preprocess, compile, optimize,
+and assemble each module and insert it into the package library, then link
+the package executable.  The host operating system overhead for the process
+spawns, page faults, etc. is also a major factor.
+
+.nh 2
+IRAF Applications Benchmarks
+
+    The benchmarks discussed in this section are run from within the IRAF
+environment, using only standard IRAF applications tasks.  The cpu and clock
+execution times of any (compiled) IRAF task may be measured by prefixing
+the task name with a $ when the command is entered, as shown in the examples.
+The significance of the cpu time measurement is not precisely defined for
+all systems.  On a UNIX host, it is the "user" cpu time used by the task.
+On a VMS host, there does not appear to be any distinction between the user
+and system times (probably because the system services execute in the context
+of the calling process), hence the cpu time given probably includes both.
+
+.nh 3
+Mkhelpdb [MKHDB]
+
+    The \fBmkhelpdb\fR task is in the \fBsoftools\fR package.  The function of
+the task is to scan the tree of ".hd" help-directory files and compile the
+binary help database.
+
+.nf
+	cl> softools
+	cl> $mkhelpdb
+.fi
+
+This benchmark tests primarily the global optimization of the Fortran
+compiler, since the code being executed is quite complex.  It also tests the
+speed with which text files can be opened and read.  Since the size of the
+help database varies with each version of IRAF, this benchmark is only useful
+for comparing the same version of IRAF run on different hosts, or the same
+version run on a single host at different times.
+
+.nh 3
+Sequential Image Operators [IMADDS,IMADDR,IMSTATR,IMSHIFTR]
+
+    These benchmarks measure the time required by typical image operations.
+All tests should be performed on 512 square test images created with the
+\fBimdebug\fR package.  The \fBimages\fR package will already have been
+loaded by the \fBbench\fR package.  Enter the following commands to create
+the test images.
+
+.nf
+	cl> imdebug
+	cl> mktest pix.s s 2 "512 512"
+	cl> mktest pix.r r 2 "512 512"
+.fi
+
+The following benchmarks should be run on these test images.  Delete the
+output images after each benchmark is run.  Each benchmark should be run
+several times, discarding the first timing and averaging the remaining
+timings for the final result.
+.ls
+.ls [IMADDS]
+cl> $imarith pix.s + 5 pix2.s
+.le
+.ls [IMADDR]
+cl> $imarith pix.r + 5 pix2.r
+.le
+.ls [IMSTATR]
+cl> $imstat pix.r
+.le
+.ls [IMSHIFTR]
+cl> $imshift pix.r pix2.r .33 .44 interp=spline3
+.le
+.le
+
+The IMADD benchmarks test the efficiency of the image i/o system, including
+binary file i/o, and provide an indication of how long a simple disk to disk
+image operation takes on the system in question.  This benchmark should be
+i/o bound on most systems.  The IMSTATR and IMSHIFTR benchmarks are expected
+to be cpu bound, and test primarily the quality of the code generated by the
+host Fortran compiler.  Note that the IMSHIFTR benchmark employs a true two
+dimensional bicubic spline, hence the timings are a factor of 4 greater than
+one would expect if a one dimensional interpolator were used to shift the two
+dimensional image.
+
+.nh 3
+Image Load [IMLOAD,IMLOADF]
+
+    To run the image load benchmarks, first load the \fBtv\fR package and
+display something to get the x_display.e process into the process cache.
+Run the following two benchmarks, displaying the test image PIX.S (this image
+contains a test pattern of no interest).
+.ls
+.ls [IMLOAD]
+cl> $display pix.s 1
+.le
+.ls [IMLOADF]
+cl> $display pix.s 1 zt=none
+.le
+.le
+
+The IMLOAD benchmark measures how long it takes for a normal image load on
+the host system, including the automatic determination of the greyscale
+mapping, and the time required to map and clip the image pixels into the
+8 bits (or whatever) displayable by the image display.  This benchmark
+measures primarily the cpu speed and i/o bandwidth of the host system.
+The IMLOADF benchmark eliminates the cpu intensive greyscale transformation,
+yielding the minimum image display time for the host system.
+
+.nh 3
+Image Transpose [IMTRAN]
+
+    To run this benchmark, transpose the image PIX.S, placing the output in a
+new image.
+
+	cl> $imtran pix.s pix2.s
+
+This benchmark tests the ability of a process to grab a large amount of
+physical memory (large working set), and the speed with which the host system
+can service random rather than sequential file access requests.
+
+.nh 2
+Specialized Benchmarks
+
+    The next few benchmarks are implemented as tasks in the \fBbench\fR package,
+located in the directory "pkg$bench".  This package is not installed as a
+predefined package as the standard IRAF packages are.  Since this package is
+used infrequently the binaries may have been deleted; if the file x_bench.e is
+not present in the \fIbench\fR directory, rebuild it as follows:
+
+.nf
+	cl> cd pkg$bench
+	cl> mkpkg
+.fi
+
+To load the package, enter the following commands.  It is not necessary to
+\fIcd\fR to the bench directory to load or run the package.
+
+.nf
+	cl> task $bench = "pkg$bench/bench.cl"
+	cl> bench
+.fi
+
+This defines the following benchmark tasks.  There are no manual pages for
+these tasks; the only documentation is what you are reading.
+
+.ks
+.nf
+	fortask		- foreign task execution
+	getpar		- get parameter; tests IPC overhead
+	plots		- make line plots from an image
+	ptime		- no-op task (prints the clock time)
+	rbin		- read binary file; tests FIO bandwidth
+	rrbin		- raw (unbuffered) binary file read
+	rtext		- read text file; tests text file i/o speed
+	subproc		- subprocess connect/disconnect
+	wbin		- write binary file; tests FIO bandwidth
+	wipc		- write to IPC; tests IPC bandwidth
+	wtext		- write text file; tests text file i/o speed
+.fi
+.ke
+
+.nh 3
+Subprocess Connect/Disconnect [SUBPR]
+
+    To run the SUBPR benchmark, enter the following command.
+This will connect and disconnect the x_images.e subprocess 10 times.
+Difference the starting and final times printed as the task output to get
+the results of the benchmark.  The cpu time measurement may be meaningless
+(very small) on some systems.
+
+	cl> subproc 10
+
+This benchmark measures the time required to connect and disconnect an
+IRAF subprocess.  This includes not only the host time required to spawn
+and later shutdown a process, but also the time required by the IRAF VOS
+to set up the IPC channels, initialize the VOS i/o system, initialize the
+environment in the subprocess, and so on.  A portion of the subprocess must
+be paged into memory to execute all this initialization code.  The host system
+overhead to spawn a subprocess and fault in a portion of its address space
+is a major factor in this benchmark.
+
+.nh 3
+IPC Overhead [IPCO]
+
+    The \fBgetpar\fR task is a compiled task in x_bench.e.  The task will
+fetch the value of a CL parameter 100 times.
+
+	cl> $getpar 100
+
+Since each parameter access consists of a request sent to the CL by the
+subprocess, followed by a response from the CL process, with a negligible
+amount of data being transferred in each call, this tests the IPC overhead.
+
+.nh 3
+IPC Bandwidth [IPCB]
+
+    To run this benchmark enter the following command.  The \fBwipc\fR task
+is a compiled task in x_bench.e.
+
+	cl> $wipc 1E6 > dev$null
+
+This writes approximately 1 Mb of binary data via IPC to the CL, which discards
+the data (writes it to the null file via FIO).  Since no actual disk file i/o is
+involved, this tests the efficiency of the IRAF pseudofile i/o system and of the
+host system IPC facility.
+
+.nh 3
+Foreign Task Execution [FORTSK]
+
+    To run this benchmark enter the following command.  The \fBfortask\fR
+task is a CL script task in the \fBbench\fR package.
+
+	cl> fortask 10
+
+This benchmark executes the standard IRAF foreign task \fBrmbin\fR (one of the
+bootstrap utilities) 10 times.  The task is called with no arguments and does
+nothing other than execute, print out its "usage" message, and shut down.
+This tests the time required to execute a host system task from within the
+IRAF environment.  Only the clock time measurement is meaningful.
+
+.nh 3
+Binary File I/O [WBIN,RBIN,RRBIN]
+
+    To run these benchmarks, load the \fBbench\fR package, and then enter the
+following commands.  The \fBwbin\fR, \fBrbin\fR and \fBrrbin\fR tasks are
+compiled tasks in x_bench.e.  A binary file named BINFILE is created in the
+current directory by WBIN, and should be deleted after the benchmark has been
+run.  Each benchmark should be run at least twice before recording the time
+and moving on to the next benchmark.  Successive calls to WBIN will
+automatically delete the file and write a new one.
+
+.nf
+	cl> $wbin binfile 5E6
+	cl> $rbin binfile
+	cl> $rrbin binfile
+	cl> delete binfile	# (not part of the benchmark)
+.fi
+
+These benchmarks measure the time required to write and then read a binary disk
+file approximately 5 Mb in size.  This benchmark measures the binary file i/o
+bandwidth of the FIO interface (for sequential i/o).  In WBIN and RBIN the
+common buffered READ and WRITE requests are used, hence some memory to memory
+copying is included in the overhead measured by the benchmark.  The RRBIN
+benchmark uses ZARDBF to read the file in chunks of 32768 bytes, giving an
+estimate of the maximum i/o bandwidth for the system.
+
+.nh 3
+Text File I/O [WTEXT,RTEXT]
+
+    To run these benchmarks, load the \fBbench\fR package, and then enter the
+following commands.  The \fBwtext\fR and \fBrtext\fR tasks are compiled tasks
+in x_bench.e.  A text file named TEXTFILE is created in the current directory
+by WTEXT, and should be deleted after the benchmarks have been run.
+Successive calls to WTEXT will automatically delete the file and write a new
+one.
+
+.nf
+	cl> $wtext textfile 1E6
+	cl> $rtext textfile
+	cl> delete textfile	# (not part of the benchmark)
+.fi
+
+These benchmarks measure the time required to write and then read a text disk
+file approximately one megabyte in size (15,625 64 character lines).
+This benchmark measures the efficiency with which the system can sequentially
+read and write text files.  Since text file i/o requires the system to pack
+and unpack records, text i/o tends to be cpu bound.
+
+.nh 3
+Network I/O [NWBIN,NRBIN,NWNULL,NWTEXT,NRTEXT]
+
+    These benchmarks are equivalent to the binary and text file benchmarks
+just discussed, except that the binary and text files are accessed on a
+remote node via the IRAF network interface.  The calling sequences are
+identical except that an IRAF network filename is given instead of referencing
+a file in the current directory.  For example, the following commands would
+be entered to run the network binary file benchmarks on node LYRA (the node
+name and filename are site dependent).
+
+.nf
+	cl> $wbin lyra!/tmp3/binfile 5E6	[NWBIN]
+	cl> $rbin lyra!/tmp3/binfile		[NRBIN]
+	cl> $wbin lyra!/dev/null 5E6		[NWNULL]
+	cl> delete lyra!/tmp3/binfile
+.fi
+
+The text file benchmarks are equivalent with the obvious changes, i.e.,
+substitute "text" for "bin", "textfile" for "binfile", and omit the null
+textfile benchmark.  The type of network interface used (TCP/IP, DECNET, etc.),
+and the characteristics of the remote node should be recorded.
+
+These benchmarks test the bandwidth of the IRAF network interfaces for binary
+and text files, as well as the limiting speed of the network itself (NWNULL).
+The binary file benchmarks should be i/o bound.  NWBIN should outperform
+NRBIN since a network write is a pipelined operation, whereas a network read
+is (currently) a synchronous operation.  Text file access may be either cpu
+or i/o bound depending upon the relative speeds of the network and host cpus.
+The IRAF network interface buffers textfile i/o to minimize the number of
+network packets and maximize the i/o bandwidth.
+
+.nh 3
+Task, IMIO, GIO Overhead [PLOTS]
+
+    The \fBplots\fR task is a CL script task which calls the \fBprow\fR task
+repeatedly to plot the same line of an image.  The graphics output is
+discarded (directed to the null file) rather than plotted since otherwise
+the results of the benchmark would be dominated by the plotting speed of the
+graphics terminal.
+
+	cl> plots pix.s 10
+
+This is a complex benchmark.  The benchmark measures the overhead of task
+(not process) execution and the overhead of the IMIO and GIO subsystems,
+as well as the speed with which IPC can be used to pass parameters to a task
+and return the GIO graphics metacode to the CL.
+
+The \fBprow\fR task is all overhead and is not normally used to interactively
+plot image lines (\fBimplot\fR is what is normally used), but it is a good
+task to use for a benchmark since it exercises the subsystems most commonly
+used in scientific tasks.  The \fBprow\fR task has a couple dozen parameters
+(mostly hidden), must open the image to read the image line to be plotted
+on every call, and must open the GIO graphics device on every call as well.
+
+.nh 3
+System Loading [2USER,4USER]
+
+    This benchmark attempts to measure the response of the system as the
+load increases.  This is done by running large \fBplots\fR jobs on several
+terminals and then repeating the 10 plots \fBplots\fR benchmark.
+For example, to run the 2USER benchmark, login on a second terminal and
+enter the following command, and then repeat the PLOTS benchmark discussed
+in the last section.  Be sure to use a different login or login directory
+for each "user", to avoid concurrency problems, e.g., when reading the
+input image or updating parameter files.
+
+	cl> plots pix.s 9999
+
+Theoretically, the timings should be approximately .5 (2USER) and .25 (4USER)
+as fast as when the PLOTS benchmark was run on a single user system, assuming
+that cpu time is the limiting resource and that a single job is cpu bound.
+In a case where there is more than one limiting resource, e.g., disk seeks as
+well as cpu cycles, performance will fall off more rapidly.  If, on the other
+hand, a single user process does not keep the system busy, e.g., because
+synchronous i/o is used, performance will fall off less rapidly.  If the
+system unexpectedly runs out of some critical system resource, e.g., physical
+memory or some internal OS buffer space, performance may be much worse than
+expected.
+
+If the multiuser performance is poorer than expected it may be possible to
+improve the system performance significantly once the reason for the poor
+performance is understood.  If disk seeks are the problem it may be possible
+to distribute the load more evenly over the available disks.  If the
+performance decays linearly as more users are added and then gets really bad,
+it is probably because some critical system resource has run out.  Use the
+system monitoring tools provided with the host operating system to try to
+identify the critical resource.  It may be possible to modify the system
+tuning parameters to fix the problem, once the critical resource has been
+identified.
+
+.nh
+Interpreting the Benchmark Results
+
+    Many factors determine the timings obtained when the benchmarks are run
+on a system.  These factors include all of the following:
+
+.ls
+.ls o
+The hardware configuration, e.g., cpu used, clock speed, availability of
+floating point hardware, type of floating point hardware, amount of memory,
+number and type of disks, degree of fragmentation of the disks, bus bandwidth,
+disk controller bandwidth, memory controller bandwidth for memory mapped DMA
+transfers, and so on.
+.le
+.ls o
+The host operating system, including the version number, tuning parameters,
+user quotas, working set size, files system parameters, Fortran compiler
+characteristics, level of optimization used to compile IRAF, and so on.
+.le
+.ls o
+The version of IRAF being run.  On a VMS system, are the images "installed"
+to permit shared memory and reduce physical memory usage?  Were the programs
+compiled with the code optimizer, and if so, what compiler options were used?
+Are shared libraries used if available on the host system?
+.le
+.ls o
+Other activity in the system when the benchmarks were run.  If there were no
+other users on the machine at the time, how about batch jobs?  If the machine
+is on a cluster or network, were other nodes accessing the same disks?
+How many other processes were running on the local node?  Ideally, the
+benchmarks should be run on an otherwise idle system, else the results may be
+meaningless or next to impossible to interpret.  Given some idea of how the
+host system responds to loading, it is possible to estimate how a timing
+will scale as the system is loaded, but the reverse operation is much more
+difficult.
+.le
+.le
+
+
+Because so many factors contribute to the results of a benchmark, it can be
+difficult to draw firm conclusions from any benchmark, no matter how simple.
+The hardware and software in modern computer systems is so complicated that
+it is difficult even for an expert with a detailed knowledge and understanding
+of the full system to explain in detail where the time is going, even when
+running the simplest benchmark.  On some recent message based multiprocessor
+systems it is probably impossible to fully comprehend what is going on at any
+given time, even if one fully understands how the system works, because of the
+dynamic nature of such systems.
+
+Despite these difficulties, the benchmarks do provide a coarse measure of the
+relative performance of different host systems, as well as some indication of
+the efficiency of the IRAF VOS.  The benchmarks are designed to measure the
+performance of the \fIhost system\fR (both hardware and software) in a number
+of important areas, all of which play a role in determining the suitability of
+a system for scientific data processing.  The benchmarks are \fInot\fR
+designed to measure the efficiency of the IRAF software itself (except parts
+of the VOS), e.g., there is no measure of the time taken by the CL to compile
+and execute a script, no measure of the speed of the median algorithm or of
+an image transpose, and so on.  These timings are also important, of course,
+but should be measured separately.  Also, measurements of the efficiency of
+individual applications programs are much less critical than the performance
+criteria dealt with here, since it is relatively easy to optimize an
+inefficient or poorly designed applications program, even a complex one like
+the CL, but there is generally little one can do about the host system.
+
+The timings for the benchmarks for a number of host systems are given in the
+appendices which follow.  Sometimes there will be more than one set of
+benchmarks for a given host system, e.g., because the system provided two or
+more disks or floating point options with different levels of performance.
+The notes at the end of each set of benchmarks are intended to document any
+special features or problems of the host system which may have affected the
+results.  In general we did not bother to record things like system tuning
+parameters, working set, page faults, etc., unless these were considered an
+important factor in the benchmarks.  In particular, few IRAF programs page
+fault other than during process startup, hence this is rarely a significant
+factor when running these benchmarks (except possibly in IMTRAN).
+
+Detailed results for each configuration of each host system are presented on
+separate pages in the Appendices.  A summary table showing the results of
+selected benchmarks for all host systems at once is also provided.
+The system characteristic or characteristics principally measured by each 
+benchmark is noted in the table below.  This is only approximate, e.g., the
+MIPS rating is a significant factor in all but the most i/o bound benchmarks.
+
+.ks
+.nf
+       benchmark        responsiveness   mips   flops   i/o
+
+	CLSS	              *
+	MKPKGV	              *
+	MKHDB	              *           *
+	PLOTS	              *           *
+	IMADDS	                          *              *
+	IMADDR	                                  *      *
+	IMSTATR	                                  *
+	IMSHIFTR                                  *
+	IMTRAN	                                         *
+	WBIN	                                         *
+	RBIN	                                         *
+.fi
+.ke
+
+
+By \fIresponsiveness\fR we refer to the interactive response of the system
+as perceived by the user.  A system with a good interactive response will do
+all the little things very fast, e.g., directory listings, image header
+listings, plotting from an image, loading new packages, starting up a new
+process, and so on.  Machines which score high in this area will seem fast
+to the user, whereas machines which score poorly will \fIseem\fR slow,
+sometimes frustratingly slow, even though they may score high in the areas
+of floating point performance, or i/o bandwidth.  The interactive response
+of a system obviously depends upon the MIPS rating of the system (see below),
+but an often more significant factor is the design and computational complexity
+of the host operating system itself, in particular the time taken by the host
+operating system to execute system calls.  Any system which spends a large
+fraction of its time in kernel mode will probably have poor interactive
+response.  The response of the system to loading is also very important,
+i.e., if the system has trouble with load balancing as the number of users
+(or processes) increases, response will become increasingly erratic until the
+interactive response is hopelessly poor.
+
+The MIPS column refers to the raw speed of the system when executing arbitrary
+code containing a mixture of various types of instructions, but little floating
+point, i/o, or system calls.  A machine with a high MIPS rating will have a
+fast cpu, e.g., a fast clock rate, fast memory access time, large cache memory,
+and so on, as well as a good optimizing Fortran compiler.  Assuming good
+compilers, the MIPS rating is primarily a measure of the hardware speed of
+the host machine, but all of the MIPS related benchmarks presented here also
+make a significant number of system calls (MKHDB, for example, does a lot of
+files accesses and text file i/o), hence it is not that simple.  Perhaps a
+completely cpu bound pure-MIPS benchmark should be added to our suite of
+benchmarks (the MIPS rating of every machine is generally well known, however).
+
+The FLOPS column identifies those benchmarks which do a significant amount of
+floating point computation.  The IMSHIFTR and IMSTATR benchmarks in particular
+are heavily into floating point.  These benchmarks measure the single
+precision floating point speed of the host system hardware, as well as the
+effectiveness of do-loop optimization by the host Fortran compiler.
+The degree of optimization provided by the Fortran compiler can affect the
+timing of these benchmarks by up to a factor of two.  Note that the sample is
+very small, and if a compiler fails to optimize the inner loop of one of these
+benchmark programs, the situation may be reversed when running some other
+benchmark.  Any reasonable Fortran compiler should be able to optimize the
+inner loop of the IMADDR benchmark, so the CPU timing for this benchmark is
+a good measure of the hardware floating point speed, if one allows for do-loop
+overhead, memory i/o, and the system calls necessary to access the image on
+disk.
+
+The I/O column identifies those benchmarks which are i/o bound and which
+therefore provide some indication of the i/o bandwidth of the host system.
+The i/o bandwidth actually achieved in these benchmarks depends upon
+many factors, the most important of which are the host operating system
+software (files system data structures and i/o software, disk drivers, etc.)
+and the host system hardware, i.e., disk type, disk controller type, bus
+bandwidth, and DMA memory controller bandwidth.  Note that asynchronous i/o
+is not currently used in these benchmarks, hence higher transfer rates are
+probably possible in special cases (on a busy system all i/o is asynchronous
+at the host system level anyway).  Large transfers are used to minimize disk
+seeks and synchronization delays, hence the benchmarks should provide a good
+measure of the realistically achievable host i/o bandwidth.
+
+.bp
+ .
+.sp 20
+.ce
+APPENDIX 1. IRAF VERSION 2.5 BENCHMARKS
+.ce
+April-June 1987
+
+.bp
+.sh
+UNIX/IRAF V2.5 4.3BSD UNIX, 8Mb memory, VAX 11/750+FPA RA81 (lyra)
+.br
+CPU times are given in seconds, CLK times in minutes and seconds.
+.br
+Wednesday, 1 April, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS	      7.4+2.6	0:17	 		CPU = user + system
+MKPKGV	     13.4+9.9	0:39	 		CPU = user + system
+MKPKGC	    135.1+40.	3:46	 		CPU = user + system
+MKHDB	       22.79	0:40	 		[1]
+IMADDS	        3.31	0:10	512X512X16	 
+IMADDR	        4.28	0:17	512X512X32	 
+IMSTATR	       10.98	0:15	512X512X32	 
+IMSHIFTR      114.41	2:13	512X512X32	 
+IMLOAD	        7.62	0:15	512X512X16	 
+IMLOADF	        2.63	0:08	512X512X16	 
+IMTRAN	       10.19	0:17	512X512X16	 
+SUBPR	        n/a	0:20	10 conn/discon 	2.0 sec/proc
+IPCO	        0.92	0:07	100 getpars	 
+IPCB	        2.16	0:15	1E6 bytes	66.7 Kb/sec
+FORTSK	        n/a	0:06	10 commands 	0.6 sec/cmd
+WBIN	        4.32	0:24	5E6 bytes	208.3 Kb/sec
+RBIN	        4.08	0:24	5E6 bytes	208.3 Kb/sec
+RRBIN	        0.12	0:22	5E6 bytes	227.3 Kb/sec
+WTEXT	       37.30	0:42	1E6 bytes	23.8 Kb/sec
+RTEXT	       26.49	0:32	1E6 bytes	31.3 Kb/sec
+NWBIN	        4.64	1:43	5E6 bytes	48.5 Kb/sec [2]
+NRBIN	        6.49	1:34	5E6 bytes	53.2 Kb/sec [2]
+NWNULL	        4.91	1:21	5E6 bytes	61.7 Kb/sec [2]
+NWTEXT	       44.03	1:02	1E6 bytes	16.1 Kb/sec [2]
+NRTEXT	       31.38	2:04	1E6 bytes	8.1 Kb/sec  [2]
+PLOTS	        n/a	0:29	10 plots	2.9 sec/PROW
+2USER	        n/a	0:44	10 plots	4.4 sec/PROW
+4USER	        n/a	1:19	10 plots	7.9 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+All cpu timings from MKHDB on do not include the "system" time.
+.le
+.ls [2]
+The remote node used for the network tests was aquila, a VAX 11/750 running
+4.3 BSD UNIX.  The network protocol used was TCP/IP.
+.le
+
+.bp
+.sh
+UNIX/IRAF V2.5 SUN UNIX 3.3, SUN 3/160C, (tucana)
+.br
+16 MHz 68020, 68881 fpu, 8Mb, 2-380Mb Fujitsu Eagle disks
+.br
+Friday, June 12, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS          2.0+0.8	0:03	 		CPU = user + system
+MKPKGV	      3.2+4.5	0:17	 		CPU = user + system
+MKPKGC	     59.1+26.2	2:13	 		CPU = user + system
+MKHDB	        5.26	0:10	 		[1]
+IMADDS	        0.62	0:03	512X512X16	 
+IMADDR          3.43	0:09	512X512X32	 
+IMSTATR	        8.38	0:11	512X512X32	 
+IMSHIFTR       83.44	1:33	512X512X32	 
+IMLOAD	        6.78	0:11    512X512X16
+IMLOADF	 	1.21    0:03    512X512X16
+IMTRAN	        1.47	0:05	512X512X16	 
+SUBPR	        n/a	0:07	10 conn/discon 	0.7 sec/proc
+IPCO	        0.16	0:02	100 getpars	 
+IPCB	        0.70	0:05	1E6 bytes	200.0 Kb/sec
+FORTSK	        n/a	0:02	10 commands 	0.2 sec/cmd
+WBIN	        2.88	0:08	5E6 bytes	625.0 Kb/sec
+RBIN		2.58	0:11	5E6 bytes	454.5 Kb/sec
+RRBIN		0.01	0:10	5E6 bytes	500.0 Kb/sec
+WTEXT		9.20	0:10	1E6 bytes	100.0 Kb/sec
+RTEXT		6.75	0:07	1E6 bytes	142.8 Kb/sec
+NWBIN		2.65	1:04	5E6 bytes	78.1 Kb/sec  [2]
+NRBIN		3.42	1:16	5E6 bytes	65.8 Kb/sec  [2]
+NWNULL		2.64	1:01	5E6 bytes	82.0 Kb/sec  [2]
+NWTEXT		11.92	0:39	1E6 bytes	25.6 Kb/sec  [2]
+NRTEXT		7.41	1:24	1E6 bytes	11.9 Kb/sec  [2]
+PLOTS		n/a	0:09	10 plots	0.9 sec/PROW
+2USER		n/a	0:16	10 plots	1.6 sec/PROW
+4USER		n/a	0:35	10 plots	3.5 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+All timings from MKHDB on do not include the "system" time.
+.le
+.ls [2]
+The remote node used for the network tests was aquila, a VAX 11/750
+running 4.3BSD UNIX.  The network protocol used was TCP/IP.
+.le
+
+.bp
+.sh
+UNIX/IRAF V2.5 SUN UNIX 3.3, SUN 3/160C + FPA (KPNO 4 meter system)
+.br
+16 MHz 68020, Sun-3 FPA, 8Mb, 2-380Mb Fujitsu Eagle disks
+.br
+Friday, June 12, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS          1.9+0.7	0:04	 		CPU = user + system
+MKPKGV	      3.1+3.9	0:19	 		CPU = user + system
+MKPKGC	     66.2+20.3	2:06	 		CPU = user + system
+MKHDB	        5.30	0:11	 		[1]
+IMADDS	        0.63	0:03	512X512X16	 
+IMADDR          0.86	0:06	512X512X32	 
+IMSTATR	        5.08	0:08	512X512X32	 
+IMSHIFTR       31.06	0:36	512X512X32	 
+IMLOAD	        2.76    0:06    512X512X16
+IMLOADF	 	1.22    0:03    512X512X16
+IMTRAN	        1.46	0:04	512X512X16	 
+SUBPR	        n/a	0:06	10 conn/discon 	0.6 sec/proc
+IPCO	        0.16	0:01	100 getpars	 
+IPCB	        0.60	0:05	1E6 bytes	200.0 Kb/sec
+FORTSK	        n/a	0:02	10 commands 	0.2 sec/cmd
+WBIN	        2.90	0:07	5E6 bytes	714.3 Kb/sec
+RBIN		2.54	0:11	5E6 bytes	454.5 Kb/sec
+RRBIN		0.03	0:10	5E6 bytes	500.0 Kb/sec
+WTEXT		9.20	0:11	1E6 bytes	 90.9 Kb/sec
+RTEXT		6.70	0:08	1E6 bytes	125.0 Kb/sec
+NWBIN		n/a
+NRBIN		n/a				[3]
+NWNULL		n/a
+NWTEXT		n/a
+NRTEXT		n/a
+PLOTS		n/a	0:06	10 plots	0.6 sec/PROW
+2USER		n/a	0:10	10 plots	1.0 sec/PROW
+4USER		n/a	0:26	10 plots	2.6 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+All timings from MKHDB on do not include the "system" time.
+.le
+
+.bp
+.sh
+UNIX/IRAF V2.5, SUN UNIX 3.2, SUN 3/160 (taurus)
+.br
+16 MHz 68020, Sun-3 FPA, 16 Mb, SUN SMD disk 280 Mb
+.br
+7 April 1987, Skip Schaller, Steward Observatory, University of Arizona
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes\fR
+            (user+sys) (m:ss)
+
+CLSS 	    01.2+01.1	0:03
+MKPKGV	    03.2+10.1	0:18
+MKPKGC	    65.4+25.7	2:03
+MKHDB		 5.4	0:18
+IMADDS		 0.6	0:04	512x512x16
+IMADDR		 0.9	0:07	512x512x32
+IMSTATR		11.4	0:13	512x512x32
+IMSHIFTR	30.1	0:34	512x512x32
+IMLOAD		(not available)
+IMLOADF		(not available)
+IMTRAN		 1.4	0:04	512x512x16
+SUBPR		 -	0:07	10 conn/discon		  0.7 sec/proc
+IPCO		 0.1	0:02	100 getpars
+IPCB		 0.8	0:05	1E6 bytes		200.0 Kb/sec
+FORTSK		 -	0:03	10 commands		  0.3 sec/cmd
+WBIN		 2.7	0:14	5E6 bytes		357.1 Kb/sec
+RBIN		 2.5	0:09	5E6 bytes		555.6 Kb/sec
+RRBIN		 0.1	0:06	5E6 bytes		833.3 Kb/sec
+WTEXT		 9.0	0:10	1E6 bytes		100.0 Kb/sec
+RTEXT		 6.4	0:07	1E6 bytes		142.9 Kb/sec
+NWBIN		 2.8	1:08	5E6 bytes		 73.5 Kb/sec
+NRBIN		 3.1	1:25	5E6 bytes		 58.8 Kb/sec
+NWNULL		 2.7	0:55	5E6 bytes		 90.9 Kb/sec
+NWTEXT		12.3	0:44	1E6 bytes		 22.7 Kb/sec
+NRTEXT		 7.7	1:45	1E6 bytes		  9.5 Kb/sec
+PLOTS		 -	0:07	10 plots		  0.7 sec/PROW
+2USER		 -	0:13
+4USER		 -	0:35
+.fi
+
+
+Notes:
+.ls [1]
+The remote node used for the network tests was carina, a VAX 11/750
+running 4.3 BSD UNIX.  The network protocol used was TCP/IP.
+.le
+
+.bp
+.sh
+Integrated Solutions (ISI), Lick Observatory 
+.br
+16-Mhz 68020, 16-Mhz 68881 fpu, 8Mb Memory
+.br
+IRAF compiled with Greenhills compilers without -O optimization
+.br
+Thursday, 14 May, 1987, Richard Stover, Lick Observatory
+
+.nf
+\fBBenchmark	CPU	CLK	Size		Notes\fR
+ 	    (user+sys) (m:ss)	 	 
+
+CLSS	      1.6+0.7   0:03
+MKPKGV	      3.1+4.6   0:25
+MKPKGC	     40.4+11.6  1:24 
+MKHDB	        6.00    0:17
+IMADDS	        0.89    0:05	512X512X16	 
+IMADDR	        3.82    0:10	512X512X32	 
+IMSTATR	        7.77    0:10	512X512X32	 
+IMSHIFTR       81.60    1:29	512X512X32	 
+IMLOAD	        n/a
+IMLOADF	        n/a
+IMTRAN	        1.62    0:06	512X512X16	 
+SUBPR	        n/a     0:05    10 donn/discon    0.5 sec/proc
+IPCO	        0.27    0:02    100 getpars
+IPCB	        1.50    0:08	1E6 bytes	125.0 Kb/sec
+FORTSK	        n/a     0:13	10 commands	  1.3 sec/cmd
+WBIN	        4.82    0:17	5E6 bytes	294.1 Kb/sec
+RBIN	        4.63    0:18	5E6 bytes	277.8 Kb/sec
+RRBIN	        0.03    0:13	5E6 bytes	384.6 Kb/sec
+WTEXT	       17.10    0:19	1E6 bytes	 45.5 Kb/sec
+RTEXT	        7.40    0:08	1E6 bytes	111.1 Kb/sec
+NWBIN	        n/a
+NRBIN	        n/a
+NWNULL	        n/a
+NWTEXT	        n/a
+NRTEXT	        n/a
+PLOTS           n/a     0:10      10 plots	1.0 sec/PROW
+2USER           n/a    
+4USER           n/a    
+.fi
+
+
+Notes:
+.ls [1]
+An initial attempt to bring IRAF up on the ISI using the ISI C and Fortran
+compilers failed due to there being too many bugs in these compilers, so
+the system was brought up using the Greenhills compilers.
+.le
+
+.bp
+.sh
+ULTRIX/IRAF V2.5, ULTRIX 1.2, VAXStation II/GPX (gll1)
+.br
+5Mb memory, 150 Mb RD54 disk
+.br
+Thursday, 21 May, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS	     4.2+1.8	0:09	 		CPU = user + system
+MKPKGV	     9.8+6.1	0:37	 		CPU = user + system
+MKPKGC	    96.8+24.4	3:15	 		CPU = user + system
+MKHDB	      15.50	0:38	 		[1]
+IMADDS	       2.06	0:09	512X512X16	 
+IMADDR	       2.98	0:17	512X512X32	 
+IMSTATR	      10.98	0:16	512X512X32	 
+IMSHIFTR      95.61	1:49	512X512X32	 
+IMLOAD	       6.90	0:17	512X512X16	[2]
+IMLOADF	       2.58	0:10	512X512X16	[2]
+IMTRAN	       4.93	0:16	512X512X16	 
+SUBPR	       n/a	0:19	10 conn/discon 	1.9 sec/proc
+IPCO	       0.47	0:03	100 getpars	 
+IPCB	       1.21	0:07	1E6 bytes	142.9 Kb/sec
+FORTSK	       n/a	0:08	10 commands 	0.8 sec/cmd
+WBIN	       1.97	0:29	5E6 bytes	172.4 Kb/sec
+RBIN	       1.73	0:24	5E6 bytes	208.3 Kb/sec
+RRBIN	       0.08	0:24	5E6 bytes	208.3 Kb/sec
+WTEXT	      25.43	0:27	1E6 bytes	37.0 Kb/sec
+RTEXT	      16.65	0:18	1E6 bytes	55.5 Kb/sec
+NWBIN	       2.24	1:26	5E6 bytes	58.1 Kb/sec [3]
+NRBIN	       2.66	1:43	5E6 bytes	48.5 Kb/sec [3]
+NWNULL	       2.22	2:21	5E6 bytes	35.5 Kb/sec [3]
+NWTEXT	      27.16	2:43	1E6 bytes	6.1 Kb/sec [3]
+NRTEXT	      17.44	2:17	1E6 bytes	7.3 Kb/sec  [3]
+PLOTS	       n/a	0:20	10 plots	2.0 sec/PROW
+2USER	       n/a	0:30	10 plots	3.0 sec/PROW
+4USER	       n/a	0:51	10 plots	5.1 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+All cpu timings from MKHDB on do not include the "system" time.
+.le
+.ls [2]
+Since there is no image display on this node, the image display benchmarks
+were run using the IIS display on node lyra via the network interface.
+.le
+.ls [3]
+The remote node used for the network tests was lyra, a VAX 11/750 running
+4.3 BSD UNIX.  The network protocol used was TCP/IP.
+.le
+.ls [4]
+Much of the hardware and software for this system was provided courtesy of
+DEC so that we may better support IRAF on the microvax.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, VMS V4.5, 28Mb, VAX 8600 RA81/Clustered (draco)
+.br
+Friday, 15 May, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS		2.87	0:08	 	 
+MKPKGV		33.57	1:05	 	 
+MKPKGC		3.26	1:16	 	 
+MKHDB		8.59	0:17	 	 
+IMADDS		1.56	0:05	512X512X16	 
+IMADDR		1.28	0:07	512X512X32	 
+IMSTATR		2.09	0:04	512X512X32	 
+IMSHIFTR	13.54	0:32	512X512X32	 
+IMLOAD		2.90	0:10	512X512X16	[1]
+IMLOADF		1.04	0:08	512X512X16	[1]
+IMTRAN		2.58	0:06	512X512X16	 
+SUBPR		n/a	0:27	10 conn/discon	2.7 sec/proc
+IPCO		0.00	0:02	100 getpars	 
+IPCB		0.04	0:06	1E6 bytes	166.7 Kb/sec
+FORTSK		n/a	0:13	10 commands	1.3 sec/cmd
+WBIN		1.61	0:17	5E6 bytes	294.1 Kb/sec
+RBIN		1.07	0:08	5E6 bytes	625.0 Kb/sec
+RRBIN		0.34	0:08	5E6 bytes	625.0 Kb/sec
+WTEXT	       10.62	0:17	1E6 bytes	58.8 Kb/sec
+RTEXT		4.64	0:06	1E6 bytes	166.7 Kb/sec
+NWBIN		2.56	2:00	5E6 bytes	41.7 Kb/sec [2]
+NRBIN		5.67	1:57	5E6 bytes	42.7 Kb/sec [2]
+NWNULL		2.70	1:48	5E6 bytes	46.3 Kb/sec [2]
+NWTEXT		12.06	0:47	1E6 bytes	21.3 Kb/sec [2]
+NRTEXT		10.10	1:41	1E6 bytes	9.9 Kb/sec  [2]
+PLOTS		n/a	0:09	10 plots	0.9 sec/PROW
+2USER		n/a	0:10    10 plots	1.0 sec/PROW
+4USER		n/a	0:18	10 plots	1.8 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+The image display was accessed via the network (IRAF TCP/IP network interface,
+Wollongong TCP/IP package for VMS), with the IIS image display residing on
+node lyra and accessed via a UNIX/IRAF kernel server.  The binary and text
+file network tests also used lyra as the remote node.
+.le
+.ls [2]
+The remote node for network benchmarks was aquila, a VAX 11/750 running
+4.3BSD UNIX.  Connection made via TCP/IP.
+.le
+.ls [3]
+The system was linked using shared libraries and the IRAF executables for
+the cl and system tasks, as well as the shared library, were "installed"
+using the VMS INSTALL utility.
+.le
+.ls [4]
+The high value of the IPC bandwidth for VMS is due to the use of shared
+memory.  Mailboxes were considerably slower and are no longer used.
+.le
+.ls [5]
+The foreign task interface uses mailboxes to talk to a DCL run as a
+subprocess and should be considerably faster than it is.  It is slow at
+present due to the need to call SET MESSAGE before and after the user
+command to disable pointless DCL error messages having to do with
+logical names.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, VAX 11/780, VMS V4.5, 16Mb memory, RA81 disks (wfpct1)
+.br
+Tuesday, 19 May, 1987,  Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size		Notes\fR
+ 	    (user+sys) (m:ss)	 	 
+
+CLSS	        7.94   0:15
+MKPKGV	      102.49   2:09
+MKPKGC	        9.50   2:22
+MKHDB	       26.10   0:31
+IMADDS	        3.57   0:10	512X512X16	 
+IMADDR	        4.22   0:17	512X512X32	 
+IMSTATR	        6.78   0:10	512X512X32	 
+IMSHIFTR       45.11   0:57	512X512X32	 
+IMLOAD	        n/a
+IMLOADF	        n/a
+IMTRAN	        7.83   0:14	512X512X16	 
+SUBPR	        n/a    0:53     10 donn/discon  5.3 sec/proc
+IPCO	        0.02   0:03     100 getpars
+IPCB	        0.17   0:10	1E6 bytes	100.0 Kb/sec
+FORTSK	        n/a    0:20	10 commands	2.0 sec/cmd
+WBIN	        4.52   0:30	5E6 bytes	166.7 Kb/sec
+RBIN	        3.90   0:19	5E6 bytes	263.2 Kb/sec
+RRBIN	        1.23   0:17	5E6 bytes	294.1 Kb/sec
+WTEXT	       37.99   0:50	1E6 bytes	20.0 Kb/sec
+RTEXT	       18.52   0:19	1E6 bytes	52.6 Kb/sec
+NWBIN	        n/a
+NRBIN	        n/a
+NWNULL	        n/a
+NWTEXT	        n/a
+NRTEXT	        n/a
+PLOTS           n/a    0:19      10 plots	1.9 sec/PROW
+2USER           n/a    0:31      10 plots	3.1 sec/PROW
+4USER           n/a    1:04      10 plots	6.4 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+The Unibus interface used for the RA81 disks for these benchmarks is
+notoriously slow, hence the i/o bandwidth of the system as tested was
+probably significantly worse than many sites would experience (using
+disks on the faster Massbus interface).
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, VAX 11/780, VMS V4.5 (wfpct1)
+.br
+16Mb memory, IRAF installed on RA81 disks, data on RM03/Massbus [1].
+.br
+Tuesday, 9 June, 1987,  Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size		Notes\fR
+ 	    (user+sys) (m:ss)	 	 
+
+CLSS	        n/a    
+MKPKGV	        n/a   
+MKPKGC	        n/a  
+MKHDB	        n/a 
+IMADDS	        3.38   0:08	512X512X16	 
+IMADDR	        4.00   0:11	512X512X32	 
+IMSTATR	        6.88   0:08	512X512X32	 
+IMSHIFTR       45.47   0:53	512X512X32	 
+IMLOAD	        n/a
+IMLOADF	        n/a
+IMTRAN	        7.71   0:12	512X512X16	 
+SUBPR	        n/a    
+IPCO	        n/a
+IPCB	        n/a
+FORTSK	        n/a
+WBIN	        4.22   0:22	5E6 bytes	227.3 Kb/sec
+RBIN	        3.81   0:12	5E6 bytes	416.7 Kb/sec
+RRBIN	        0.98   0:09	5E6 bytes	555.6 Kb/sec
+WTEXT	       37.20   0:47     1E6 bytes        21.3 Kb/sec
+RTEXT	       17.95   0:18	1E6 bytes	 55.6 Kb/sec
+NWBIN	        n/a
+NRBIN	        n/a
+NWNULL	        n/a
+NWTEXT	        n/a
+NRTEXT	        n/a
+PLOTS           n/a    0:16      10 plots	1.6 sec/PROW
+2USER           
+4USER           
+.fi
+
+Notes:
+.ls [1]
+The data files were stored on an RM03 with 23 free Mb and a Massbus interface
+for these benchmarks.  Only those benchmarks which access the RM03 are given.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, MicroVMS 4.5, VAXStation II/GPX (gll1)
+.br
+5Mb memory, 70Mb RD53 plus 300 Mb Maxstor with Emulex controller.
+.br
+Wednesday, 13 May, 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size		Notes\fR
+ 	    (user+sys) (m:ss)	 	 
+
+CLSS	        9.66   0:17
+MKPKGV	      109.26   2:16
+MKPKGC	        9.25   2:53
+MKHDB	       27.58   0:39 
+IMADDS	        3.51   0:07	512X512X16	 
+IMADDR	        4.31   0:10	512X512X32	 
+IMSTATR	        9.31   0:11	512X512X32	 
+IMSHIFTR       74.54   1:21	512X512X32	 
+IMLOAD	        n/a
+IMLOADF	        n/a
+IMTRAN	       10.81   0:27	512X512X16	 
+SUBPR	        n/a    0:53	10 conn/discon	5.3 sec/proc
+IPCO	        0.03   0:03     100 getpars
+IPCB	        0.13   0:07	1E6 bytes	142.8 Kb/sec
+FORTSK	        n/a    0:29	10 commands	2.9 sec/cmd
+WBIN	        3.29   0:16	5E6 bytes	312.5 Kb/sec
+RBIN	        2.38   0:10	5E6 bytes	500.0 Kb/sec
+RRBIN	        0.98   0:09	5E6 bytes	555.5 Kb/sec
+WTEXT	       41.00   0:53	1E6 bytes	18.9 Kb/sec
+RTEXT	       28.74   0:29	1E6 bytes	34.5 Kb/sec
+NWBIN	        8.28   0:46     5E6 bytes       108.7 Kb/sec [1]
+NRBIN	        5.66   0:50     5E6 bytes       100.0 Kb/sec [1]
+NWNULL	        8.39   0:42     5E6 bytes       119.0 Kb/sec [1]
+NWTEXT	       30.21   0:33     1E6 bytes        30.3 Kb/sec [1]
+NRTEXT	       20.05   0:38     1E6 bytes        26.3 Kb/sec [1]
+PLOTS                  0:16      10 plots        1.6 sec/plot    
+2USER                  0:26      10 plots        2.6 sec/plot
+4USER          
+.fi
+
+Notes:
+.ls [1]
+The remote node for the network tests was draco, a VAX 8600 running
+V4.5 VMS.  The network protocol used was DECNET.
+.le
+.ls [2]
+Much of the hardware and software for this system was provided courtesy of
+DEC so that we may better support IRAF on the microvax.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, MicroVMS 4.5, VAXStation II/GPX (gll1)
+.br
+5 Mb memory, IRAF on 300 Mb Maxstor/Emulex, data on 70 Mb RD53 [1].
+.br
+Sunday, 31 May, 1987, Suzanne H. Jacoby, NOAO/Tucson.
+
+.nf
+\fBBenchmark	CPU	CLK	Size		Notes\fR
+ 	    (user+sys) (m:ss)	 	 
+
+CLSS	        n/a    n/a
+MKPKGV	        n/a    n/a
+MKPKGC	        n/a    n/a
+MKHDB	        n/a    n/a
+IMADDS	        3.44   0:07	512X512X16	 
+IMADDR	        4.31   0:15	512X512X32	 
+IMSTATR	        9.32   0:12	512X512X32	 
+IMSHIFTR       74.72   1:26	512X512X32	 
+IMLOAD	        n/a
+IMLOADF	        n/a
+IMTRAN	       10.83   0:35	512X512X16	 
+SUBPR	        n/a    
+IPCO	        n/a
+IPCB	        n/a
+FORTSK	        n/a    
+WBIN	        3.33   0:26     5E6 bytes       192.3 Kb/sec
+RBIN	        2.30   0:17     5E6 bytes       294.1 Kb/sec
+RRBIN	        0.97   0:11	5E6 bytes	294.1 Kb/sec
+WTEXT	       40.84   0:54	1E6 bytes	 18.2 Kb/sec
+RTEXT	       27.99   0:28	1E6 bytes	 35.7 Kb/sec
+NWBIN	        n/a
+NRBIN	        n/a
+NWNULL	        n/a
+NWTEXT	       n/a
+NRTEXT	       n/a
+PLOTS                  0:17      10 plots        1.7 sec/plot    
+2USER          n/a
+4USER          n/a
+.fi
+
+
+Notes:
+.ls [1]
+IRAF installed on a 300 Mb Maxstor with Emulax controller; data files on a
+70Mb RD53.  Only those benchmarks which access the RD53 disk are included
+below.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.5, VMS V4.5, VAX 11/750+FPA RA81/Clustered, 7.25 Mb (vela)
+.br
+Friday, 15 May 1987, Suzanne H. Jacoby, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS           14.11    0:27	 	 
+MKPKGV	      189.67	4:17	 	 
+MKPKGC	       18.08	3:44	 	 
+MKHDB	       46.54	1:11	 	 
+IMADDS	        5.90	0:11	512X512X16	 
+IMADDR	        6.48	0:14	512X512X32	 
+IMSTATR	       10.65	0:14	512X512X32	 
+IMSHIFTR       69.62	1:33	512X512X32	 
+IMLOAD	       15.83	0:23	512X512X16	
+IMLOADF	        6.08	0:13	512X512X16
+IMTRAN	       14.85	0:20	512X512X16	 
+SUBPR	        n/a	1:54	10 conn/discon	11.4 sec/proc
+IPCO	        1.16	0:06	100 getpars	 
+IPCB	        2.92	0:09	1E6 bytes	111.1 Kb/sec
+FORTSK	        n/a	0:33	10 commands	3.3 sec/cmd
+WBIN	        6.96	0:21	5E6 bytes	238.1 Kb/sec
+RBIN	        5.37	0:13	5E6 bytes	384.6 Kb/sec
+RRBIN	        1.86	0:10	5E6 bytes	500.0 Kb/sec
+WTEXT	       66.12	1:24	1E6 bytes	11.9 Kb/sec
+RTEXT	       32.06	0:36	1E6 bytes	27.7 Kb/sec
+NWBIN	       13.53	1:49	5E6 bytes	45.9 Kb/sec [1]
+NRBIN	       19.52	2:06	5E6 bytes	39.7 Kb/sec [1]
+NWNULL	       13.40	1:44	5E6 bytes	48.1 Kb/sec [1]
+NWTEXT	       82.35	1:42	1E6 bytes	9.8 Kb/sec  [1]
+NRTEXT	       63.00	2:39	1E6 bytes	6.3 Kb/sec  [1]
+PLOTS           n/a     0:25	10 plots	2.5  sec/PROW	
+2USER           n/a	0:53	10 plots	5.3  sec/PROW
+4USER           n/a	1:59    10 plots        11.9 sec/PROW
+.fi
+
+
+Notes:
+.ls [1]
+The remote node for network benchmarks was aquila, a VAX 11/750 running
+4.3BSD UNIX.  Connection made via TCP/IP.
+.le
+.ls [2]
+The interactive response of this system seemed to decrease markedly when it
+was converted to 4.X VMS and is currently pretty marginal, even on a single
+user 11/750.  In interactive applications which make frequent system calls the
+system tends to spend much of the available cpu time in kernel mode even if
+there are only a few active users.
+.le
+.ls [2]
+Compare the 2USER and 4USER timings with those for the UNIX 11/750.  This
+benchmark is characteristic of the two systems.  No page faulting was evident
+on the VMS 11/750 during the multiuser benchmarks.  It took much longer to
+run the 4USER benchmark on the VMS 750, as the set up time was much longer
+once one or two other PLOTS jobs were running.  The UNIX machine, on the other
+hand, seemed almost as fast (or as slow) as usual, even with the PLOTS jobs
+running on the other terminals.
+.le
+.ls [4]
+The high value of the IPC bandwidth for VMS is due to the use of shared
+memory.  Mailboxes were considerably slower and are no longer used.
+.le
+.ls [5]
+The foreign task interface uses mailboxes to talk to a DCL run as a subprocess
+and should be considerably faster than it is.  It is slow at present due to
+the need to call SET MESSAGE before and after the user command to disable
+pointless DCL error messages having to do with logical names.
+.le
+
+.bp
+.sh
+AOSVS/IRAF V2.5, AOSVS 7.54, Data General MV 10000 (solpl)
+.br
+24Mb, 2-600 Mb ARGUS disks and 2-600 Mb KISMET disks
+.br
+17 April 1987, Skip Schaller, Steward Observatory, University of Arizona
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes\fR
+               (sec)   (m:ss)
+CLSS 		 2.1	0:14				[1]
+MKPKGV		 9.6	0:29
+MKPKGC		 n/a	3:43
+MKHDB		 6.4	0:25
+IMADDS		 1.5	0:06	512x512x16
+IMADDR		 1.6	0:08	512x512x32
+IMSTATR		 4.8	0:07	512x512x32
+IMSHIFTR	39.3	0:47	512x512x32
+IMLOAD		 3.1	0:08	512x512x16		[2]
+IMLOADF		 0.8	0:06	512x512x16		[2]
+IMTRAN		 2.9	0:06	512x512x16
+SUBPR		 n/a	0:36	10 conn/discon		  3.6 sec/proc
+IPCO		 0.4	0:03	100 getpars
+IPCB		 0.9	0:07	1E6 bytes		142.9 Kb/sec
+FORTSK		 n/a	0:17	10 commands		  1.7 sec/cmd
+WBIN		 1.7	0:56	5E6 bytes		 89.3 Kb/sec [3]
+RBIN		 1.7	0:25	5E6 bytes		200.0 Kb/sec [3]
+RRBIN		 0.5	0:27	5E6 bytes		185.2 Kb/sec [3]
+WTEXT		12.7	0:25	1E6 bytes		 40.0 Kb/sec [3]
+RTEXT		 8.4	0:13	1E6 bytes		 76.9 Kb/sec [3]
+CSTC		 0.0	0:00	5E6 bytes			     [4]
+WSTC		 1.9	0:11	5E6 bytes		454.5 Kb/sec
+RSTC		 1.5	0:11	5E6 bytes		454.5 Kb/sec
+RRSTC		 0.1	0:10	5E6 bytes		500.0 Kb/sec
+NWBIN		 2.0    1:17	5E6 bytes		 64.9 Kb/sec [5]
+NRBIN		 2.1	2:34	5E6 bytes		 32.5 Kb/sec
+NWNULL		 2.0	1:15	5E6 bytes		 66.7 Kb/sec
+NWTEXT		15.1	0:41	1E6 bytes		 24.4 Kb/sec
+NRTEXT		 8.7	0:55	1E6 bytes		 18.2 Kb/sec
+PLOTS		 n/a	0:09	10 plots		  0.9 sec/PROW
+2USER		 n/a	0:12
+4USER		 n/a	0:20
+.fi
+
+
+Notes:
+.ls [1]
+The CLSS given is for a single user on the system.  With one user already
+logged into IRAF, the CLSS was 0:10.
+.le
+.ls [2]
+These benchmarks were measured on the CTI system, an almost identically
+configured MV/10000, with an IIS Model 75.
+.le
+.ls [3]
+I/O throughput depends heavily on the element size of an AOSVS file.  For
+small element sizes, the throughput is roughly proportional to the element
+size.  I/O throughput in general could improve when IRAF file i/o starts
+using double buffering and starts taking advantage of the asynchronous
+definition of the kernel i/o drivers.
+.le
+.ls [4]
+These static file benchmarks are not yet official IRAF benchmarks, but are
+analogous to the binary file benchmarks.  Since they use the supposedly
+more efficient static file driver, they should give a better representation
+of the true I/O throughput of the system.  Since these are the drivers used
+for image I/O, they represent the I/O throughput for the bulk image files.
+.le
+.ls [5]
+The remote node used for the network tests was taurus, a SUN 3-160
+running SUN/UNIX 3.2.  The network protocol used was TCP/IP.
+.le
+
+.bp
+.sh
+AOSVS/IRAF V2.5, Data General MV 8000 (CTIO La Serena system)
+.br
+5Mb memory (?), 2 large DG disks plus 2 small Winchesters [1]
+.br
+17 April 1987, Doug Tody, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes\fR
+               (sec)   (m:ss)
+CLSS 		 n/a    0:28                            [2]
+MKPKGV		 n/a	2:17
+MKPKGC		 n/a	6:38
+MKHDB		13.1    0:57
+IMADDS		 2.9	0:12	512x512x16
+IMADDR		 3.1	0:17	512x512x32
+IMSTATR		 9.9	0:13	512x512x32
+IMSHIFTR	77.7	1:31	512x512x32
+IMLOAD		 n/a
+IMLOADF		 n/a
+IMTRAN		 5.69	0:12	512x512x16
+SUBPR		 n/a	1:01	10 conn/discon		  6.1 sec/proc
+IPCO		 0.6	0:04	100 getpars
+IPCB		 2.1	0:13	1E6 bytes		 76.9 Kb/sec
+FORTSK		 n/a	0:31	10 commands		  3.1 sec/cmd
+WBIN		 5.0	2:41	5E6 bytes		 31.1 Kb/sec 
+RBIN		 2.4	0:25	5E6 bytes		200.0 Kb/sec 
+RRBIN		 0.8	0:28	5E6 bytes		178.6 Kb/sec 
+WTEXT		24.75	0:57	1E6 bytes		 17.5 Kb/sec 
+RTEXT		23.92	0:30	1E6 bytes		 33.3 Kb/sec 
+NWBIN		 n/a
+NRBIN		 n/a
+NWNULL		 n/a
+NWTEXT		 n/a
+NRTEXT		 n/a
+PLOTS		 n/a	0:16	10 plots		  1.6 sec/PROW
+2USER		 n/a	0:24    10 plots                  2.4 sec/PROW
+4USER		 
+.fi
+
+
+Notes:
+.ls [1]
+These benchmarks were run with the disks very nearly full and badly
+fragmented, hence the i/o performance of the system was much worse than it
+might otherwise be.
+.le
+.ls [2]
+The CLSS given is for a single user on the system.  With one user already
+logged into IRAF, the CLSS was 0:18.
+.le
+
+.bp
+ .
+.sp 20
+.ce
+APPENDIX 2. IRAF VERSION 2.2 BENCHMARKS
+.ce
+March 1986
+
+.bp
+.sh
+UNIX/IRAF V2.2 4.2BSD UNIX, VAX 11/750+FPA RA81 (lyra)
+.br
+CPU times are given in seconds, CLK times in minutes and seconds.
+.br
+Saturday, 22 March, D. Tody, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS 	     06.8+04.0	0:13
+MKPKGV	     24.5+26.0	1:11
+MKPKGC	    160.5+67.4	4:33
+MKHDB		25.1+?	0:41
+IMADDS		 3.3+?	0:08	512x512x16
+IMADDR		 4.4	0:15	512x512x32
+IMSTATR		23.6	0:29	512x512x32
+IMSHIFTR       116.3	2:14	512x512x32
+IMLOAD		 9.6	0:15	512x512x16
+IMLOADF		 3.9	0:08	512x512x16
+IMTRAN		 9.8	0:16	512x512x16
+SUBPR		  -	0:28	10 conn/discon		  2.8 sec/proc
+IPCO		 1.3	0:08	100 getpars
+IPCB		 2.5	0:16	1E6 bytes		 62.5 Kb/sec
+FORTSK		 4.4	0:22	10 commands		  2.2 sec/cmd
+WBIN		 4.8	0:23	5E6 bytes		217.4 Kb/sec
+RBIN		 4.4	0:22	5E6 bytes		227.3 Kb/sec
+RRBIN		 0.2	0:20	5E6 bytes		250.0 Kb/sec
+WTEXT		37.2	0:43	1E6 bytes		 23.2 Kb/sec
+RTEXT		32.2	0:37	1E6 bytes		 27.2 Kb/sec
+NWBIN		 5.1	2:01	5E6 bytes		 41.3 Kb/sec
+NRBIN		 8.3	2:13	5E6 bytes		 37.6 Kb/sec
+NWNULL		 5.1	1:55	5E6 bytes		 43.5 Kb/sec
+NWTEXT		40.5	1:15	1E6 bytes		 13.3 Kb/sec
+NRTEXT		24.8	2:15	1E6 bytes		  7.4 Kb/sec
+PLOTS		  -	0:25	10 plots		  2.5 clk/PROW
+2USER		  -	0:43
+4USER		  -	1:24
+.fi
+
+
+Notes:
+.ls [1]
+All cpu timings from MKHDB on do not include the "system" time.
+.le
+.ls [2]
+4.3BSD UNIX, due out shortly, reportedly differs from 4.2 mostly in that
+a number of efficiency improvements have been made.  These benchmarks will
+be rerun as soon as 4.3BSD becomes available.
+.le
+.ls [3]
+In UNIX/IRAF V2.2, IPC communications are implemented with pipes which
+are really sockets (a much more sophisticated mechanism than we need),
+which accounts for the relatively low IPC bandwidth.
+.le
+.ls [4]
+The remote node used for the network tests was aquila, a VAX 11/750 running
+4.2 BSD UNIX.  The network protocol used was TCP/IP.
+.le
+.ls [5]
+The i/o bandwidth to disk should be improved dramatically when we implement
+the planned "static file driver" for UNIX.  This will provide direct,
+asynchronous i/o for large preallocated binary files which do not change
+in size after creation.  The use of the global buffer cache by the UNIX
+read and write system services is the one major shortcoming of the UNIX
+system for image processing applications.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.2, VMS V4.3, VAX 11/750+FPA RA81/Clustered (vela)
+.br
+Wednesday, 26 March, D. Tody, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS 		14.4	0:40
+MKPKGV	       260.0	6:05
+MKPKGC		 -	4:51
+MKHDB		40.9	1:05
+IMADDS		 6.4	0:10	512x512x16
+IMADDR		 6.5	0:13	512x512x32
+IMSTATR		15.8	0:18	512x512x32
+IMSHIFTR        68.2	1:17 	512x512x32
+IMLOAD		10.6	0:15	512x512x16
+IMLOADF		 4.1	0:07	512x512x16
+IMTRAN		14.4	0:20	512x512x16
+SUBPR		 -	1:03	10 conn/discon		6 sec/subpr
+IPCO		 1.4	0:06	100 getpars
+IPCB		 2.8	0:07	1E6 bytes		143 Kb/sec
+FORTSK		 -	0:35	10 commands		3.5 sec/cmd
+WBIN  (ra81)Cl	 6.7	0:20	5E6 bytes		250 Kb/sec
+RBIN  (ra81)Cl	 5.1	0:12	5E6 bytes		417 Kb/sec
+RRBIN (ra81)Cl	 1.8	0:10	5E6 bytes		500 Kb/sec
+WBIN  (rm80)	 6.8	0:17	5E6 bytes		294 Kb/sec
+RBIN  (rm80)	 5.1	0:13	5E6 bytes		385 Kb/sec
+RRBIN (rm80)	 1.8	0:09	5E6 bytes		556 Kb/sec
+WTEXT		65.6	1:19	1E6 bytes		 13 Kb/sec
+RTEXT		32.5	0:34	1E6 bytes		 29 Kb/sec
+NWBIN		(not available)
+NRBIN		(not available)
+NWNULL		(not available)
+NWTEXT		(not available)
+NRTEXT		(not available)
+PLOTS		 -	0:24	10 plots
+2USER		 -	0:43
+4USER		 -	2:13	response was somewhat erratic
+.fi
+
+
+Notes:
+
+.ls [1]
+The interactive response of this system seemed to decrease markedly either
+when it was converted to 4.x VMS or when it was clustered with our 8600.
+In interactive applications which involve a lot of process spawns and other
+system calls, the system tends to spend about half of the available cpu time
+in kernel mode even if there are only a few active users.  These problems
+are much less noticeable on an 8600 or even on a 780, hence one wonders if
+VMS has perhaps become too large and complicated for the relatively slow 11/750,
+at least when used in a VAX-cluster configuration.
+.le
+.ls [2]
+Compare the 2USER and 4USER timings with those for the UNIX 11/750.  This
+benchmark is characteristic of the two systems.  No page faulting was evident
+on the VMS 11/750 during the multiuser benchmarks.  It took much longer to
+run the 4USER benchmark on the VMS 750, as the set up time was much longer
+once one or two other PLOTS jobs were running.  The UNIX machine, on the other
+hand, seemed almost as fast (or as slow) as usual, even with the PLOTS jobs
+running on the other terminals.
+.le
+.ls [3]
+The RA81 was clustered with the 8600, whereas the RM80 was directly connected
+to the 11/750.
+.le
+.ls [4]
+The high value of the IPC bandwidth for VMS is due to the use of shared
+memory.  Mailboxes were considerably slower and are no longer used.
+.le
+.ls [5]
+The foreign task interface uses mailboxes to talk to a DCL run as a subprocess
+and should be considerably faster than it is.  It is slow at present due to
+the need to call SET MESSAGE before and after the user command to disable
+pointless DCL error messages having to do with logical names.
+.le
+
+.bp
+.sh
+VMS/IRAF V2.2, VMS V4.3, VAX 8600 RA81/Clustered (draco)
+.br
+Saturday, 22 March, D. Tody, NOAO/Tucson
+
+.nf
+\fBBenchmark	CPU	CLK	Size			Notes		\fR
+            (user+sys) (m:ss)
+
+CLSS 		 2.4	0:08
+MKPKGV		48.0	1:55
+MKPKGC		 -	1:30
+MKHDB		 7.1	0:21
+IMADDS		 1.2	0:04	512x512x16
+IMADDR		 1.5	0:08	512x512x32
+IMSTATR		 3.0	0:05	512x512x32
+IMSHIFTR	13.6	0:20	512x512x32
+IMLOAD		 2.8	0:07	512x512x16		via TCP/IP to lyra
+IMLOADF		 1.3	0:07	512x512x16		via TCP/IP to lyra
+IMTRAN		 3.2	0:07	512x512x16
+SUBPR		 -	0:26	10 conn/discon		  2.6 sec/proc
+IPCO		 0.0	0:02	100 getpars
+IPCB		 0.3	0:07	1E6 bytes		142.9 Kb/sec
+FORTSK		 -	0:13	10 commands		  1.3 sec/cmd
+WBIN  (RA81)Cl	 1.3	0:13	5E6 bytes		384.6 Kb/sec
+RBIN  (RA81)Cl	 1.1	0:08	5E6 bytes		625.0 Kb/sec
+RRBIN (RA81)Cl	 0.3	0:07	5E6 bytes		714.0 Kb/sec
+WTEXT		10.7	0:20	1E6 bytes		 50.0 Kb/sec
+RTEXT		 5.2	0:05	1E6 bytes		200.0 Kb/sec
+NWBIN		 1.8	1:36	5E6 bytes		 52.1 Kb/sec
+NRBIN		 8.0	2:06	5E6 bytes		 39.7 Kb/sec
+NWNULL		 2.5	1:20	5E6 bytes		 62.5 Kb/sec
+NWTEXT		 6.5	0:43	1E6 bytes		 23.3 Kb/sec
+NRTEXT		 5.9	1:39	1E6 bytes		 10.1 Kb/sec
+PLOTS		 -	0:06	10 plots		  0.6 sec/PROW
+2USER		 -	0:08
+4USER		 -	0:14
+.fi
+
+
+Notes:
+
+.ls [1]
+Installed images were not used for these benchmarks; the CLSS timing
+should be slightly improved if the CL image is installed.
+.le
+.ls [2]
+The image display was accessed via the network (IRAF TCP/IP network interface,
+Wollongong TCP/IP package for VMS), with the IIS image display residing on
+node lyra and accessed via a UNIX/IRAF kernel server.  The binary and text
+file network tests also used lyra as the remote node.
+.le
+.ls [3]
+The high value of the IPC bandwidth for VMS is due to the use of shared
+memory.  Mailboxes were considerably slower and are no longer used.
+.le
+.ls [4]
+The foreign task interface uses mailboxes to talk to a DCL run as a
+subprocess and should be considerably faster than it is.  It is slow at
+present due to the need to call SET MESSAGE before and after the user
+command to disable pointless DCL error messages having to do with
+logical names.
+.le
+.ls [5]
+The cpu on the 8600 is so fast, compared to the fairly standard VAX i/o
+channels, that most tasks are i/o bound.  The system can therefore easily
+support several heavy users before much degradation in performance is seen
+(provided they access data stored on different disks to avoid a disk seek
+bottleneck).  This is borne out in the 2USER and 4USER benchmarks shown above.
+The cpu did not become saturated until the fourth user was added in this
+particular benchmark.
+.le
diff --git a/pkg/bench/bench.ms b/pkg/bench/bench.ms
new file mode 100644
index 00000000..1dc6ebf7
--- /dev/null
+++ b/pkg/bench/bench.ms
@@ -0,0 +1,788 @@
+.RP
+.TL
+A Set of Benchmarks for Measuring IRAF System Performance
+.AU
+Doug Tody
+.AI
+.K2 "" "" "*"
+March 1986
+.br
+(Revised July 1987)
+
+.AB
+.ti 0.75i
+This paper presents a set of benchmarks for measuring the performance of
+IRAF as installed on a particular host system.  The benchmarks serve two
+purposes: [1] they provide an objective means of comparing the performance of
+different IRAF host systems, and [2] the benchmarks may be repeated as part of
+the IRAF installation procedure to verify that the expected performance is
+actually being achieved.  While the benchmarks chosen are sometimes complex,
+i.e., at the level of actual applications programs and therefore difficult to
+interpret in detail, some effort has been made to measure all the important
+performance characteristics of the host system.  These include the raw cpu
+speed, the floating point processing speed, the i/o bandwidth to disk, and a
+number of characteristics of the host operating system as well, e.g., the
+efficiency of common system calls, the interactive response of the system,
+and the response of the system to loading.  The benchmarks are discussed in
+detail along with instructions for benchmarking a new system, followed by
+tabulated results of the benchmarks for a number of IRAF host machines.
+.AE
+
+.pn 1
+.bp
+.ce
+\fBContents\fR
+.sp 3
+.sp
+1.\h'|0.4i'\fBIntroduction\fP\l'|5.6i.'\0\01
+.sp
+2.\h'|0.4i'\fBWhat is Measured\fP\l'|5.6i.'\0\02
+.sp
+3.\h'|0.4i'\fBThe Benchmarks\fP\l'|5.6i.'\0\03
+.br
+\h'|0.4i'3.1.\h'|0.9i'Host Level Benchmarks\l'|5.6i.'\0\03
+.br
+\h'|0.9i'3.1.1.\h'|1.5i'CL Startup/Shutdown [CLSS]\l'|5.6i.'\0\03
+.br
+\h'|0.9i'3.1.2.\h'|1.5i'Mkpkg (verify) [MKPKGV]\l'|5.6i.'\0\04
+.br
+\h'|0.9i'3.1.3.\h'|1.5i'Mkpkg (compile) [MKPKGC]\l'|5.6i.'\0\04
+.br
+\h'|0.4i'3.2.\h'|0.9i'IRAF Applications Benchmarks\l'|5.6i.'\0\04
+.br
+\h'|0.9i'3.2.1.\h'|1.5i'Mkhelpdb [MKHDB]\l'|5.6i.'\0\05
+.br
+\h'|0.9i'3.2.2.\h'|1.5i'Sequential Image Operators [IMADD, IMSTAT, etc.]\l'|5.6i.'\0\05
+.br
+\h'|0.9i'3.2.3.\h'|1.5i'Image Load [IMLOAD,IMLOADF]\l'|5.6i.'\0\05
+.br
+\h'|0.9i'3.2.4.\h'|1.5i'Image Transpose [IMTRAN]\l'|5.6i.'\0\06
+.br
+\h'|0.4i'3.3.\h'|0.9i'Specialized Benchmarks\l'|5.6i.'\0\06
+.br
+\h'|0.9i'3.3.1.\h'|1.5i'Subprocess Connect/Disconnect [SUBPR]\l'|5.6i.'\0\07
+.br
+\h'|0.9i'3.3.2.\h'|1.5i'IPC Overhead [IPCO]\l'|5.6i.'\0\07
+.br
+\h'|0.9i'3.3.3.\h'|1.5i'IPC Bandwidth [IPCB]\l'|5.6i.'\0\07
+.br
+\h'|0.9i'3.3.4.\h'|1.5i'Foreign Task Execution [FORTSK]\l'|5.6i.'\0\07
+.br
+\h'|0.9i'3.3.5.\h'|1.5i'Binary File I/O [WBIN,RBIN,RRBIN]\l'|5.6i.'\0\07
+.br
+\h'|0.9i'3.3.6.\h'|1.5i'Text File I/O [WTEXT,RTEXT]\l'|5.6i.'\0\08
+.br
+\h'|0.9i'3.3.7.\h'|1.5i'Network I/O [NWBIN,NRBIN,etc.]\l'|5.6i.'\0\08
+.br
+\h'|0.9i'3.3.8.\h'|1.5i'Task, IMIO, GIO Overhead [PLOTS]\l'|5.6i.'\0\09
+.br
+\h'|0.9i'3.3.9.\h'|1.5i'System Loading [2USER,4USER]\l'|5.6i.'\0\09
+.sp
+4.\h'|0.4i'\fBInterpreting the Benchmark Results\fP\l'|5.6i.'\0\010
+.sp
+\fBAppendix A: IRAF Version 2.5 Benchmarks\fP
+.sp
+\fBAppendix B: IRAF Version 2.2 Benchmarks\fP
+
+.nr PN 0
+.bp
+.NH
+Introduction
+.PP
+This set of benchmarks has been prepared with a number of purposes in mind.
+Firstly, the benchmarks may be run after installing IRAF on a new system to
+verify that the performance expected for that machine is actually being
+achieved.  In general, this cannot be taken for granted since the performance
+actually achieved on a particular system may depend upon how the system
+is configured and tuned.  Secondly, the benchmarks may be run to compare
+the performance of different IRAF hosts, or to track the system performance
+over a period of time as improvements are made, both to IRAF and to the host
+system.  Lastly, the benchmarks provide a metric which can be used to tune
+the host system.
+.PP
+All too often, the only benchmarks run on a system are those which test the
+execution time of optimized code generated by the host Fortran compiler.
+This is primarily a hardware benchmark and secondarily a test of the Fortran
+optimizer.  An example of this type of test is the famous Linpack benchmark.
+.PP
+The numerical execution speed test is an important benchmark but it tests only
+one of the many factors contributing to the overall performance of the system
+as perceived by the user.  In interactive use other factors are often more
+important, e.g., the time required to spawn or communicate with a subprocess,
+the time required to access a file, the response of the system as the number
+of users (or processes) increases, and so on.  While the quality of optimized
+code is significant for cpu intensive batch processing, other factors are
+often more important for sophisticated interactive applications.
+.PP
+The benchmarks described here are designed to test, as fully as possible,
+the major factors contributing to the overall performance of the IRAF system
+on a particular host.  A major factor in the timings of each benchmark is
+of course the IRAF system itself, but comparisons of different hosts are
+nonetheless possible since the code is virtually identical on all hosts
+(the applications and VOS are in fact identical on all hosts).
+The IRAF kernel (OS interface) is coded differently for each host operating
+system, but the functions performed by the kernel are identical on each host,
+and since the kernel is a very "thin" layer the kernel code itself is almost
+always a negligible factor in the final timings.
+.PP
+The IRAF version number, host operating system and associated version number,
+and the host computer hardware configuration are all important in interpreting
+the results of the benchmarks, and should always be recorded.
+
+.NH
+What is Measured
+.PP
+Each benchmark measures two quantities, the total cpu time required to
+execute the benchmark, and the total (wall) clock time required to execute the
+benchmark.  If the clock time measurement is to be of any value the benchmarks
+must be run on a single user system.  Given this "best time" measurement
+and some idea of how the system responds to loading, it is not difficult to
+estimate the performance to be expected on a loaded system.
+.PP
+The total cpu time required to execute a benchmark consists of the "user" time
+plus the "system" time.  The "user" time is the cpu time spent executing
+the instructions comprising the user (IRAF) program, i.e., any instructions
+in procedures linked directly into the process being executed.  The "system"
+time is the cpu time spent in kernel mode executing the system services called
+by the user program.  On some systems there is no distinction between the two
+types of timings, with the system time either being included in the measured
+cpu time, or omitted from the timings.  If the benchmark involves several
+concurrent processes no cpu time measurement of the subprocesses may be
+possible on some systems.
+.PP
+When possible we give both measurements, while in some cases only the user
+time is given, or only the sum of the user and system times.  The cpu time
+measurements are therefore only directly comparable between different
+operating systems for the simpler benchmarks, in particular those which make
+few system calls.  The cpu measurements given \fIare\fR accurate for the same
+operating system (e.g., some version of UNIX) running on different hosts,
+and may be used to compare such systems.  Reliable comparisions between
+different operating systems are also possible, but only if one thoroughly
+understands what is going on.
+.PP
+The clock time measurement includes both the user and system times, plus the
+time spent waiting for i/o.  Any minor system daemon processes executing while
+the benchmarks are being run may bias the clock time measurement slightly,
+but since these are a constant part of the host environment it is fair to
+include them in the timings.  Major system daemons which run infrequently
+(e.g., the print symbiont in VMS) should invalidate the benchmark.
+.PP
+Assuming an otherwise idle system, a comparison of the cpu and clock times
+tells whether the benchmark was cpu bound or i/o bound.  Those benchmarks
+involving compiled IRAF tasks do not include the process startup and pagein
+times (these are measured by a different benchmark), hence the task should be
+run once before running the benchmark to connect the subprocess and page in
+the memory used by the task.  A good procedure to follow is to run each
+benchmark once to start the process, and then repeat the benchmark three times,
+averaging the results.  If inconsistent results are obtained further iterations
+and/or monitoring of the host system are called for until a consistent result
+is achieved.
+.PP
+Many benchmarks depend upon disk performance as well as compute cycles.
+For such a benchmark to be a meaningful measure of the i/o bandwidth of the
+system it is essential that no other users (or batch jobs) be competing for
+disk seeks on the disk used for the test file.  There are subtle things to
+watch out for in this regard, for example, if the machine is in a VMS cluster
+or on a local area network, processes on other nodes may be accessing the
+local disk, yet will not show up on a user login or process list on the local
+node.  It is always desirable to repeat each test several times or on several
+different disk devices, to ensure that no outside requests were being serviced
+while the benchmark was being run.  If the system has disk monitoring utilities
+use these to find an idle disk before running any benchmarks which do heavy i/o.
+.PP
+Beware of disks which are nearly full; the maximum achievable i/o bandwidth
+may fall off rapidly as a disk fills up, due to disk fragmentation (the file
+must be stored in little pieces scattered all over the physical disk).
+Similarly, many systems (VMS, AOS/VS, V7 and Sys V UNIX, but not Berkeley UNIX)
+suffer from disk fragmentation problems that gradually worsen as a files system
+ages, requiring that the disk periodically be backed off onto tape and then
+restored to render the files and free spaces as contiguous as possible.
+In some cases, disk fragmentation can cause the maximum achievable i/o
+bandwidth to degrade by an order of magnitude.  For example, on a VMS system
+one can use \fLCOPY/CONTIGUOUS\fR to render files contiguous (e.g., this can
+be done on all the executables in \fL[IRAF.BIN]\fR after installing the
+system, to speed process pagein times).  If the copy fails for a large file
+even though there is substantial free space left on the disk, the disk is
+badly fragmented.
+
+.NH
+The Benchmarks
+.PP
+Instructions are given for running each benchmark, and the operations
+performed by each benchmark are briefly described.  The system characteristics
+measured by the benchmark are briefly discussed.  A short mnemonic name is
+associated with each benchmark to identify it in the tables given in the
+appendices, tabulating the results for actual host machines.
+
+.NH 2
+Host Level Benchmarks
+.PP
+The benchmarks discussed in this section are run at the host system level.
+The examples are given for the UNIX cshell, under the assumption that a host
+dependent example is better than none at all.  These commands must be
+translated by the user to run the benchmarks on a different system
+(hint: use \fLSHOW STATUS\fR or a stop watch to measure wall clock times
+on a VMS host).
+.NH 3
+CL Startup/Shutdown [CLSS]
+.PP
+Go to the CL login directory (any directory containing a \fLLOGIN.CL\fR file),
+mark the time (the method by which this is done is system dependent),
+and startup the CL.  Enter the "logout" command while the CL is starting up
+so that the CL will not be idle (with the clock running) while the command
+is being entered.  Mark the final cpu and clock time and compute the
+difference.
+.DS
+\fL% time cl
+logout\fR
+.DE
+.LP
+This is a complex benchmark but one which is of obvious importance to the
+IRAF user.  The benchmark is probably dominated by the cpu time required to
+start up the CL, i.e., start up the CL process, initialize the i/o system,
+initialize the environment, interpret the CL startup file, interpret the
+user LOGIN.CL file, connect and disconnect the x_system.e subprocess, and so on.
+Most of the remaining time is the overhead of the host operating system for
+the process spawns, page faults, file accesses, and so on.
+\fIDo not use a customized \fLLOGIN.CL\fP file when running this benchmark\fR,
+or the timings will almost certainly be affected.
+.NH 3
+Mkpkg (verify) [MKPKGV]
+.PP
+Go to the PKG directory and enter the (host system equivalent of the)
+following command.  The method by which the total cpu and clock times are
+computed is system dependent.
+.DS
+\fL% cd $iraf/pkg
+% time mkpkg -n\fR
+.DE
+.LP
+This benchmark does a "no execute" make-package of the entire PKG suite of
+applications and systems packages.  This tests primarily the speed with which
+the host system can read directories, resolve pathnames, and return directory
+information for files.  Since the PKG directory tree is continually growing,
+this benchmark is only useful for comparing the same version of IRAF run on
+different hosts, or the same version of IRAF on the same host at different
+times.
+.NH 3
+Mkpkg (compile) [MKPKGC]
+.PP
+Go to the directory "iraf$pkg/bench/xctest" and enter the (host system
+equivalents of the) following commands.  The method by which the total cpu
+and clock times are computed is system dependent.  Only the \fBmkpkg\fR
+command should be timed.
+.DS
+\fL
+% cd $iraf/pkg/bench/xctest
+% mkpkg clean		# delete old library, etc., if present
+% time mkpkg
+% mkpkg clean		# delete newly created binaries\fR
+.DE
+.LP
+This tests the time required to compile and link a small IRAF package.
+The timings reflect the time required to preprocess, compile, optimize,
+and assemble each module and insert it into the package library, then link
+the package executable.  The host operating system overhead for the process
+spawns, page faults, etc. is also a major factor.  If the host system
+provides a shared library facility this will significantly affect the link
+time, hence the benchmark should be run linking both with and without shared
+libraries to make a fair comparison to other systems.  Linking against a
+large library is fastest if the library is topologically sorted and stored
+contiguously on disk.
+
+.NH 2
+IRAF Applications Benchmarks
+.PP
+The benchmarks discussed in this section are run from within the IRAF
+environment, using only standard IRAF applications tasks.  The cpu and clock
+times of any (compiled) IRAF task may be measured by prefixing the task name
+with a $ when the command is entered into the CL, as shown in the examples.
+The significance of the cpu time measurement is not precisely defined for
+all systems.  On a UNIX host, it is the "user" cpu time used by the task.
+On a VMS host, there does not appear to be any distinction between the user
+and system times (probably because the system services execute in the context
+of the calling process), hence the cpu time given probably includes both,
+but probably excludes the time for any services executing in ancillary
+processes, e.g., for RMS.
+.NH 3
+Mkhelpdb [MKHDB]
+.PP
+The \fBmkhelpdb\fR task is in the \fBsoftools\fR package.  The function of
+the task is to scan the tree of ".hd" help-directory files and compile the
+binary help database.
+.DS
+\fLcl> softools
+cl> $mkhelpdb
+.DE
+.LP
+This benchmark tests the speed of the host files system and the efficiency of 
+the host system services and text file i/o, as well as the global optimization
+of the Fortran compiler and the MIPS rating of the host machine.
+Since the size of the help database varies with each version of IRAF,
+this benchmark is only useful for comparing the same version of IRAF run
+on different hosts, or the same version run on a single host at different
+times.  Note than any additions to the base IRAF system (e.g., SDAS) will
+increase the size of the help database and affect the timings.
+.NH 3
+Sequential Image Operators [IMADDS,IMADDR,IMSTATR,IMSHIFTR]
+.PP
+These benchmarks measure the time required by typical image operations.
+All tests should be performed on 512 square test images created with the
+\fBimdebug\fR package.  The \fBimages\fR and \fBimdebug\fR packages should
+be loaded.  Enter the following commands to create the test images.
+.DS
+\fLcl> mktest pix.s s 2 "512 512"
+cl> mktest pix.r r 2 "512 512"\fR
+.DE
+.LP
+The following benchmarks should be run on these test images.  Delete the
+output images after each benchmark is run.  If you enter the commands shown
+once, the command can be repeated by typing \fL^\fR followed by return.
+Each benchmark should be run several times, discarding the first timing and
+averaging the remaining timings for the final result.
+.DS
+.TS
+l l.
+[IMADDS]	\fLcl> $imarith pix.s + 5 pix2.s; imdel pix2.s\fR
+[IMADDR]	\fLcl> $imarith pix.r + 5 pix2.r; imdel pix2.r\fR
+[IMSTATR]	\fLcl> $imstat pix.r\fR
+[IMSHIFTR]	\fLcl> $imshift pix.r pix2.r .33 .44 interp=spline3\fR
+.TE
+.DE
+.LP
+The IMADD benchmarks test the efficiency of the image i/o system, including
+binary file i/o, and provide an indication of how long a simple disk to disk
+image operation takes on the system in question.  This benchmark should be
+i/o bound on most systems.  The IMSTATR and IMSHIFTR benchmarks are normally
+cpu bound, and test primarily the speed of the host cpu and floating point
+unit, and the quality of the code generated by the host Fortran compiler.
+Note that the IMSHIFTR benchmark employs a true two dimensional bicubic spline,
+hence the timings are a factor of 4 greater than one would expect if a one
+dimensional interpolator were used to shift the two dimensional image.
+.NH 3
+Image Load [IMLOAD,IMLOADF]
+.PP
+To run the image load benchmarks, first load the \fBtv\fR package and
+display something to get the x_display.e process into the process cache.
+Run the following two benchmarks, displaying the test image PIX.S (this image
+contains a test pattern of no interest).
+.DS
+.TS
+l l.
+[IMLOAD]	\fLcl> $display pix.s 1\fR
+[IMLOADF]	\fLcl> $display pix.s 1 zt=none\fR
+.TE
+.DE
+.LP
+The IMLOAD benchmark measures how long it takes for a normal image load on
+the host system, including the automatic determination of the greyscale
+mapping, and the time required to map and clip the image pixels into the
+8 bits (or whatever) displayable by the image display.  This benchmark
+measures primarily the cpu speed and i/o bandwidth of the host system.
+The IMLOADF benchmark eliminates the cpu intensive greyscale transformation,
+yielding the minimum image display time for the host system.
+.NH 3
+Image Transpose [IMTRAN]
+.PP
+To run this benchmark, transpose the image PIX.S, placing the output in a
+new image.
+.DS
+\fLcl> $imtran pix.s pix2.s\fR
+.DE
+.LP
+This benchmark tests the ability of a process to grab a large amount of
+physical memory (large working set), and the speed with which the host system
+can service random rather than sequential file access requests.  The user
+working set should be large enough to avoid excessive page faulting.
+
+.NH 2
+Specialized Benchmarks
+.PP
+The next few benchmarks are implemented as tasks in the \fBbench\fR package,
+located in the directory "pkg$bench".  This package is not installed as a
+predefined package as the standard IRAF packages are.  Since this package is
+used infrequently the binaries may have been deleted; if the file x_bench.e is
+not present in the \fIbench\fR directory, rebuild it as follows:
+.DS
+\fLcl> cd pkg$bench
+cl> mkpkg\fR
+.DE
+.LP
+To load the package, enter the following commands.  It is not necessary to
+\fIcd\fR to the bench directory to load or run the package.
+.DS
+\fLcl> task $bench = "pkg$bench/bench.cl"
+cl> bench
+.DE
+.LP
+This defines the following benchmark tasks.  There are no manual pages for
+these tasks; the only documentation is what you are reading.
+.DS
+.TS
+l l.
+FORTASK	- foreign task execution
+GETPAR	- get parameter; tests IPC overhead
+PLOTS	- make line plots from an image
+RBIN	- read binary file; tests FIO bandwidth
+RRBIN	- raw (unbuffered) binary file read
+RTEXT	- read text file; tests text file i/o speed
+SUBPROC	- subprocess connect/disconnect
+WBIN	- write binary file; tests FIO bandwidth
+WIPC	- write to IPC; tests IPC bandwidth
+WTEXT	- write text file; tests text file i/o speed
+.TE
+.DE
+.NH 3
+Subprocess Connect/Disconnect [SUBPR]
+.PP
+To run the SUBPR benchmark, enter the following command.
+This will connect and disconnect the x_images.e subprocess 10 times.
+Difference the starting and final times printed as the task output to get
+the results of the benchmark.  The cpu time measurement may be meaningless
+(very small) on some systems.
+.DS
+\fLcl> subproc 10\fR
+.DE
+This benchmark measures the time required to connect and disconnect an
+IRAF subprocess.  This includes not only the host time required to spawn
+and later shutdown a process, but also the time required by the IRAF VOS
+to set up the IPC channels, initialize the VOS i/o system, initialize the
+environment in the subprocess, and so on.  A portion of the subprocess must
+be paged into memory to execute all this initialization code.  The host system
+overhead to spawn a subprocess and fault in a portion of its address space
+is a major factor in this benchmark.
+.NH 3
+IPC Overhead [IPCO]
+.PP
+The \fBgetpar\fR task is a compiled task in x_bench.e.  The task will
+fetch the value of a CL parameter 100 times.
+.DS
+\fLcl> $getpar 100\fR
+.DE
+Since each parameter access consists of a request sent to the CL by the
+subprocess, followed by a response from the CL process, with a negligible
+amount of data being transferred in each call, this tests the IPC overhead.
+.NH 3
+IPC Bandwidth [IPCB]
+.PP
+To run this benchmark enter the following command.  The \fBwipc\fR task
+is a compiled task in x_bench.e.
+.DS
+\fLcl> $wipc 1E6 > dev$null\fR
+.DE
+This writes approximately 1 Mb of binary data via IPC to the CL, which discards
+the data (writes it to the null file via FIO).  Since no actual disk file i/o is
+involved, this tests the efficiency of the IRAF pseudofile i/o system and of the
+host system IPC facility.
+.NH 3
+Foreign Task Execution [FORTSK]
+.PP
+To run this benchmark enter the following command.  The \fBfortask\fR
+task is a CL script task in the \fBbench\fR package.
+.DS
+\fLcl> fortask 10\fR
+.DE
+This benchmark executes the standard IRAF foreign task \fBrmbin\fR (one of the
+bootstrap utilities) 10 times.  The task is called with no arguments and does
+nothing other than execute, print out its "usage" message, and shut down.
+This tests the time required to execute a host system task from within the
+IRAF environment.  Only the clock time measurement is meaningful.
+.NH 3
+Binary File I/O [WBIN,RBIN,RRBIN]
+.PP
+To run these benchmarks, make sure the \fBbench\fR package is loaded, and enter
+the following commands.  The \fBwbin\fR, \fBrbin\fR and \fBrrbin\fR tasks are
+compiled tasks in x_bench.e.  A binary file named BINFILE is created in the
+current directory by WBIN, and should be deleted after the benchmark has been
+run.  Each benchmark should be run at least twice before recording the time
+and moving on to the next benchmark.  Successive calls to WBIN will
+automatically delete the file and write a new one.
+.PP
+\fINOTE:\fR it is wise to create the test file on a files system which has
+a lot of free space available, to avoid disk fragmentation problems.
+Also, if the host system has two or more different types of disk drives
+(or disk controllers or bus types), you may wish to run the benchmark
+separately for each drive.
+.DS
+\fLcl> $wbin binfile 5E6
+cl> $rbin binfile
+cl> $rrbin binfile
+cl> delete binfile           # (not part of the benchmark)\fR
+.DE
+.LP
+These benchmarks measure the time required to write and then read a binary disk
+file approximately 5 Mb in size.  This benchmark measures the binary file i/o
+bandwidth of the FIO interface (for sequential i/o).  In WBIN and RBIN the
+common buffered READ and WRITE requests are used, hence some memory to memory
+copying is included in the overhead measured by the benchmark.  A large FIO
+buffer is used to minimize disk seeks and synchronization delays; somewhat
+faster timings might be possible by increasing the size of the buffer
+(this is not a user controllable option, and is not possible on all host
+systems).  The RRBIN benchmark uses ZARDBF to read the file in chunks of
+32768 bytes, giving an estimate of the maximum i/o bandwidth for the system.
+.NH 3
+Text File I/O [WTEXT,RTEXT]
+.PP
+To run these benchmarks, load the \fBbench\fR package, and then enter the
+following commands.  The \fBwtext\fR and \fBrtext\fR tasks are compiled tasks
+in x_bench.e.  A text file named TEXTFILE is created in the current directory
+by WTEXT, and should be deleted after the benchmarks have been run.
+Successive calls to WTEXT will automatically delete the file and write a new
+one.
+.DS
+\fLcl> $wtext textfile 1E6
+cl> $rtext textfile
+cl> delete textfile      # (not part of the benchmark)\fR
+.DE
+.LP
+These benchmarks measure the time required to write and then read a text disk
+file approximately one megabyte in size (15,625 64 character lines).
+This benchmark measures the efficiency with which the system can sequentially
+read and write text files.  Since text file i/o requires the system to pack
+and unpack records, text i/o tends to be cpu bound.
+.NH 3
+Network I/O [NWBIN,NRBIN,NWNULL,NWTEXT,NRTEXT]
+.PP
+These benchmarks are equivalent to the binary and text file benchmarks
+just discussed, except that the binary and text files are acccessed on a
+remote node via the IRAF network interface.  The calling sequences are
+identical except that an IRAF network filename is given instead of referencing
+a file in the current directory.  For example, the following commands would
+be entered to run the network binary file benchmarks on node LYRA (the node
+name and filename are site dependent).
+.DS
+\fLcl> $wbin lyra!/tmp3/binfile 5E6	\fR[NWBIN]\fL
+cl> $rbin lyra!/tmp3/binfile		\fR[NRBIN]\fL
+cl> $wbin lyra!/dev/null 5E6		\fR[NWNULL]\fL
+cl> delete lyra!/tmp3/binfile\fR
+.DE
+.LP
+The text file benchmarks are equivalent with the obvious changes, i.e.,
+substitute "text" for "bin", "textfile" for "binfile", and omit the null
+textfile benchmark.  The type of network interface used (TCP/IP, DECNET, etc.),
+and the characteristics of the remote node should be recorded.
+.PP
+These benchmarks test the bandwidth of the IRAF network interfaces for binary
+and text files, as well as the limiting speed of the network itself (NWNULL).
+The binary file benchmarks should be i/o bound.  NWBIN should outperform
+NRBIN since a network write is a pipelined operation, whereas a network read
+is (currently) a synchronous operation.  Text file access may be either cpu
+or i/o bound depending upon the relative speeds of the network and host cpus.
+The IRAF network interface buffers textfile i/o to minimize the number of
+network packets and maximize the i/o bandwidth.
+.NH 3
+Task, IMIO, GIO Overhead [PLOTS]
+.PP
+The \fBplots\fR task is a CL script task which calls the \fBprow\fR task
+repeatedly to plot the same line of an image.  The graphics output is
+discarded (directed to the null file) rather than plotted since otherwise
+the results of the benchmark would be dominated by the plotting speed of the
+graphics terminal.
+.DS
+\fLcl> plots pix.s 10\fR
+.DE
+This is a complex benchmark.  The benchmark measures the overhead of task
+(not process) execution and the overhead of the IMIO and GIO subsystems,
+as well as the speed with which IPC can be used to pass parameters to a task
+and return the GIO graphics metacode to the CL.
+.PP
+The \fBprow\fR task is all overhead and is not normally used to interactively
+plot image lines (\fBimplot\fR is what is normally used), but it is a good
+task to use for a benchmark since it exercises the subsystems most commonly
+used in scientific tasks.  The \fBprow\fR task has a couple dozen parameters
+(mostly hidden), must open the image to read the image line to be plotted
+on every call, and must open the GIO graphics device on every call as well.
+.NH 3
+System Loading [2USER,4USER]
+.PP
+This benchmark attempts to measure the response of the system as the
+load increases.  This is done by running large \fBplots\fR jobs on several
+terminals and then repeating the 10 plots \fBplots\fR benchmark.
+For example, to run the 2USER benchmark, login on a second terminal and
+enter the following command, and then repeat the PLOTS benchmark discussed
+in the last section.  Be sure to use a different login or login directory
+for each "user", to avoid concurrency problems, e.g., when reading the
+input image or updating parameter files.
+.DS
+\fLcl> plots pix.s 9999\fR
+.DE
+Theoretically, the timings should be approximately .5 (2USER) and .25 (4USER)
+as fast as when the PLOTS benchmark was run on a single user system, assuming
+that cpu time is the limiting resource and that a single job is cpu bound.
+In a case where there is more than one limiting resource, e.g., disk seeks as
+well as cpu cycles, performance will fall off more rapidly.  If, on the other
+hand, a single user process does not keep the system busy, e.g., because
+synchronous i/o is used, performance will fall off less rapidly.  If the
+system unexpectedly runs out of some critical system resource, e.g., physical
+memory or some internal OS buffer space, performance may be much worse than
+expected.
+.PP
+If the multiuser performance is poorer than expected it may be possible to
+improve the system performance significantly once the reason for the poor
+performance is understood.  If disk seeks are the problem it may be possible
+to distribute the load more evenly over the available disks.  If the
+performance decays linearly as more users are added and then gets really bad,
+it is probably because some critical system resource has run out.  Use the
+system monitoring tools provided with the host operating system to try to
+identify the critical resource.  It may be possible to modify the system
+tuning parameters to fix the problem, once the critical resource has been
+identified.
+
+.NH
+Interpreting the Benchmark Results
+.PP
+Many factors determine the timings obtained when the benchmarks are run
+on a system.  These factors include all of the following:
+.sp
+.RS
+.IP \(bu
+The hardware configuration, e.g., cpu used, clock speed, availability of
+floating point hardware, type of floating point hardware, amount of memory,
+number and type of disks, degree of fragmentation of the disks, bus bandwidth,
+disk controller bandwidth, memory controller bandwidth for memory mapped DMA
+transfers, and so on.
+.IP \(bu
+The host operating system, including the version number, tuning parameters,
+user quotas, working set size, files system parameters, Fortran compiler
+characteristics, level of optimization used to compile IRAF, and so on.
+.IP \(bu
+The version of IRAF being run.  On a VMS system, are the images "installed"
+to permit shared memory and reduce physical memory usage?  Were the programs
+compiled with the code optimizer, and if so, what compiler options were used?
+Are shared libraries used if available on the host system?
+.IP \(bu
+Other activity in the system when the benchmarks were run.  If there were no
+other users on the machine at the time, how about batch jobs?  If the machine
+is on a cluster or network, were other nodes accessing the same disks?
+How many other processes were running on the local node?  Ideally, the
+benchmarks should be run on an otherwise idle system, else the results may be
+meaningless or next to impossible to interpret.  Given some idea of how the
+host system responds to loading, it is possible to estimate how a timing
+will scale as the system is loaded, but the reverse operation is much more
+difficult.
+.RE
+.sp
+.PP
+Because so many factors contribute to the results of a benchmark, it can be
+difficult to draw firm conclusions from any benchmark, no matter how simple.
+The hardware and software in modern computer systems is so complicated that
+it is difficult even for an expert with a detailed knowledge and understanding
+of the full system to explain in detail where the time is going, even when
+running the simplest benchmark.  On some recent message based multiprocessor
+systems it is probably impossible to fully comprehend what is going on at any
+given time, even if one fully understands how the system works, because of the
+dynamic nature of such systems.
+.PP
+Despite these difficulties, the benchmarks do provide a coarse measure of the
+relative performance of different host systems, as well as some indication of
+the efficiency of the IRAF VOS.  The benchmarks are designed to measure the
+performance of the \fIhost system\fR (both hardware and software) in a number
+of important areas, all of which play a role in determining the suitability of
+a system for scientific data processing.  The benchmarks are \fInot\fR
+designed to measure the efficiency of the IRAF software itself (except parts
+of the VOS), e.g., there is no measure of the time taken by the CL to compile
+and execute a script, no measure of the speed of the median algorithm or of
+an image transpose, and so on.  These timings are also important, of course,
+but should be measured separately.  Also, measurements of the efficiency of
+individual applications programs are much less critical than the performance
+criteria dealt with here, since it is relatively easy to optimize an
+inefficient or poorly designed applications program, even a complex one like
+the CL, but there is generally little one can do about the host system.
+.PP
+The timings for the benchmarks for a number of host systems are given in the
+appendices which follow.  Sometimes there will be more than one set of
+benchmarks for a given host system, e.g., because the system provided two or
+more disks or floating point options with different levels of performance.
+The notes at the end of each set of benchmarks are intended to document any
+special features or problems of the host system which may have affected the
+results.  In general we did not bother to record things like system tuning
+parameters, working set, page faults, etc., unless these were considered an
+important factor in the benchmarks.  In particular, few IRAF programs page
+fault other than during process startup, hence this is rarely a signficant
+factor when running these benchmarks (except possibly in IMTRAN).
+.PP
+Detailed results for each configuration of each host system are presented on
+separate pages in the Appendices.  A summary table showing the results of
+selected benchmarks for all host systems at once is also provided.
+The system characteristic or characteristics principally measured by each 
+benchmark is noted in the table below.  This is only approximate, e.g., the
+MIPS rating is a significant factor in all but the most i/o bound benchmarks.
+.KS
+.TS
+center;
+ci ci ci ci ci
+l c c c c.
+benchmark	responsiveness	mips	flops	i/o
+
+CLSS	\(bu	 		 		 
+MKPKGV	\(bu	 	 	 
+MKHDB	\(bu	\(bu		 		 
+PLOTS	\(bu	\(bu	 	 
+IMADDS	 	\(bu	 	\(bu
+IMADDR	 	 	\(bu	\(bu
+IMSTATR	 	 	\(bu	 
+IMSHIFTR	 	 	\(bu	 
+IMTRAN	 	 	 	\(bu
+WBIN	 	 	 	\(bu
+RBIN	 	 	 	\(bu
+.TE
+.KE
+.sp
+.PP
+By \fIresponsiveness\fR we refer to the interactive response of the system
+as perceived by the user.  A system with a good interactive response will do
+all the little things very fast, e.g., directory listings, image header
+listings, plotting from an image, loading new packages, starting up a new
+process, and so on.  Machines which score high in this area will seem fast
+to the user, whereas machines which score poorly will \fIseem\fR slow,
+sometimes frustratingly slow, even though they may score high in the areas
+of floating point performance, or i/o bandwidth.  The interactive response
+of a system obviously depends upon the MIPS rating of the system (see below),
+but an often more significant factor is the design and computational complexity
+of the host operating system itself, in particular the time taken by the host
+operating system to execute system calls.  Any system which spends a large
+fraction of its time in kernel mode will probably have poor interactive
+response.  The response of the system to loading is also very important,
+i.e., if the system has trouble with load balancing as the number of users
+(or processes) increases, response will become increasingly erratic until the
+interactive response is hopelessly poor.
+.PP
+The MIPS column refers to the raw speed of the system when executing arbitrary
+code containing a mixture of various types of instructions, but little floating
+point, i/o, or system calls.  A machine with a high MIPS rating will have a
+fast cpu, e.g., a fast clock rate, fast memory access time, large cache memory,
+and so on, as well as a good optimizing Fortran compiler.  Assuming good
+compilers, the MIPS rating is primarily a measure of the hardware speed of
+the host machine, but all of the MIPS related benchmarks presented here also
+make a significant number of system calls (MKHDB, for example, does a lot of
+files accesses and text file i/o), hence it is not that simple.  Perhaps a
+completely cpu bound pure-MIPS benchmark should be added to our suite of
+benchmarks (the MIPS rating of every machine is generally well known, however).
+.PP
+The FLOPS column identifies those benchmarks which do a significant amount of
+floating point computation.  The IMSHIFTR and IMSTATR benchmarks in particular
+are heavily into floating point.  These benchmarks measure the single
+precision floating point speed of the host system hardware, as well as the
+effectiveness of do-loop optimization by the host Fortran compiler.
+The degree of optimization provided by the Fortran compiler can affect the
+timing of these benchmarks by up to a factor of two.  Note that the sample is
+very small, and if a compiler fails to optimize the inner loop of one of these
+benchmark programs, the situation may be reversed when running some other
+benchmark.  Any reasonable Fortran compiler should be able to optimize the
+inner loop of the IMADDR benchmark, so the CPU timing for this benchmark is
+a good measure of the hardware floating point speed, if one allows for do-loop
+overhead, memory i/o, and the system calls necessary to access the image on
+disk.
+.PP
+The I/O column identifies those benchmarks which are i/o bound and which
+therefore provide some indication of the i/o bandwidth of the host system.
+The i/o bandwidth actually achieved in these benchmarks depends upon
+many factors, the most important of which are the host operating system
+software (files system data structures and i/o software, disk drivers, etc.)
+and the host system hardware, i.e., disk type, disk controller type, bus
+bandwidth, and DMA memory controller bandwidth.  Note that asynchronous i/o
+is not currently used in these benchmarks, hence higher transfer rates are
+probably possible in special cases (on a busy system all i/o is asynchronous
+at the host system level anyway).  Large transfers are used to minimize disk
+seeks and synchronization delays, hence the benchmarks should provide a good
+measure of the realistically achievable host i/o bandwidth.
diff --git a/pkg/bench/bench_tab.ms b/pkg/bench/bench_tab.ms
new file mode 100644
index 00000000..9245cbff
--- /dev/null
+++ b/pkg/bench/bench_tab.ms
@@ -0,0 +1,98 @@
+.LP
+.hm 0.25i 
+.nr HM 0.25i
+.vs 10
+.nr VS 10
+.ll 9.0i
+.nr LL 9.0i
+.ps 9.0
+.nr PS 9.0
+.po 0.5i 
+.nr PO 0.5i
+.bp
+.LP
+\fBIRAF V2.5   Table of Selected Benchmark Results   May 1987\fR
+.br
+CPU and/or clock times are tabulated below for selected benchmark tests.
+CPU times are given in seconds; clock times (in parentheses) are given
+as (m:ss).  For the WBIN and RBIN benchmarks, the tabulated result is
+the measured bandwidth in Kbytes/second.  For a description of the 
+benchmark tests, see the document "A Set of Benchmarks for Measuring
+IRAF System Performance", Doug Tody, May l987.
+.sp
+.TS
+cB cB cB cB s cB cB s cB s cB s cB s cB s cB cB
+cB cB cB cB s cB cB s cB s cB s cB s cB s cB cB
+lB |n| n| n n| n| n n| n n| n n| n n| n n| n| n|.
+ 	CLSS	MKPKGV	MKHDB	PLOTS	IMADDS	IMADDR	IMSTATR	IMSHIFTR	IMTRAN	WBIN	RBIN
+ 	_	_	_	_	_	_	_	_	_	_	_
+
+ISI	(0\&:03)	(0\&:25)	6\&.00	(0\&:17)	(0\&:10)	0\&.89	(0\&:05)	3\&.82	(0\&:10)	7\&.77	(0\&:10)	81\&.60	(1\&:29)	1\&.62	(0\&:06)	294.1	277.8
+
+SUN3	(0\&:03)	(0\&:17)	5\&.26	(0\&:10)	(0\&:09)	0\&.62	(0\&:03)	3\&.34	(0\&:09)	8\&.38	(0\&:11)	83\&.44	(1\&:33)	1\&.47	(0\&:05)	625.0	454.5
+
+SUN3+	(0\&:04)	(0\&:19)	5\&.28	(0\&:11)	(0\&:06)	0\&.63	(0\&:03)	0\&.86	(0\&:06)	5\&.1	(0\&:08)	31\&.1	(0\&:36)	1\&.5	(0\&:04)	714.3	454.5
+
+U750	(0\&:17)	(0\&:39)	22\&.79	(0\&:40)	(0\&:29)	3\&.31	(0\&:10)	4\&.28	(0\&:17)	10\&.98	(0\&:15)	114\&.41	(2\&:13)	10\&.19	(0\&:17)	208.3	208.3
+
+V750	(0\&:27)	(4\&:17)	46\&.54	(1\&:11)	(0\&:25)	5\&.90	(0\&:11)	6\&.48	(0\&:14)	10\&.65	(0\&:14)	69\&.62	(1\&:33)	14\&.85	(0\&:20)	238.1	384.6
+
+UMVX	(0\&:09)	(0\&:37)	15\&.5	(0\&:38)	(0\&:20)	2\&.06	(0\&:09)	2\&.98	(0\&:17)	10\&.98	(0\&:16)	95\&.61	(1\&:49)	4\&.93	(0\&:16)	172.4	208.3
+
+VMVX	n/a	n/a	n/a	n/a	(0\&:17)	3\&.44	(0\&:11)	4\&.31	(0\&:15)	9\&.32	(0\&:12)	74\&.72	(1\&:26)	10\&.83	(0\&:35)	192.3	294.1
+
+VMVXM	(0\&:17)	(2\&:16)	27\&.58	(0\&:39)	(0\&:16)	3\&.51	(0\&:07)	4\&.31	(0\&:10)	9\&.31	(0\&:11)	74\&.54	(1\&:21)	10\&.81	(0\&:27)	312.5	500.0
+
+V780	n/a	n/a	n/a	n/a	(0\&:16)	3\&.38	(0\&:08)	4\&.00	(0\&:11)	6\&.88	(0\&:08)	45\&.47	(0\&:53)	7\&.71	(0\&:12)	227.3	416.7
+
+V780S	(0\&:15)	(2\&:09)	26\&.10	(0\&:31)	(0\&:19)	3\&.57	(0\&:10)	4\&.22	(0\&:17)	6\&.78	(0\&:10)	45\&.11	(0\&:57)	7\&.83	(0\&:14)	166.7	263.2
+
+V8600	(0\&:08)	(1\&:05)	8\&.59	(0\&:17)	(0\&:09)	1\&.56	(0\&:05)	1\&.28	(0\&:07)	2\&.09	(0\&:04)	13\&.54	(0\&:32)	2\&.58	(0\&:06)	294.1	625.0
+
+MV10	(0\&:14)	(0\&:29)	6\&.4	(0\&:25)	(0\&:09)	1\&.5	(0\&:06)	1\&.6	(0\&:08)	4\&.8	(0\&:07)	39\&.3	(0\&:47)	2\&.9	(0\&:06)	89.3	200.0
+
+MV8	(0\&:28)	(2\&:17)	13.13	(0\&:57)	(0\&:16)	2\&.85	(0\&:12)	3\&.07	(0\&:17)	9\&.87	(0\&:13)	77\&.68	(1\&:31)	5\&.69	(0\&:12)	31\&.1	200\&.0
+.TE
+.sp
+.LP
+\fBKEY:\fR
+.TS
+lB lw(8.0i).
+ISI	T{
+Integrated Solutions with 16-Mhz 68020 and 16-Mhz 68881 fp_coprocessor; UNIX 
+4.2BSD; 8Mb memory; Greenhills compiler
+T}
+SUN3	T{
+SUN 3/160C with 68881 fp_chip; SUN UNIX 3.3; 8Mb memory; Eagle 
+disk with 380Mb
+T}
+SUN3+	T{
+SUN 3/180C with 68881 fp_chip + FPA; SUN UNIX 3.2; 8Mb memory; 380Mb Eagle disk
+T}
+U750	VAX 11/750+FPA; UNIX 4.3BSD; 8Mb memory; RA81 disk
+V750	VAX 11/750+FPA; VMS V4.5; 7.25 Mb memory; RA81/clustered disks
+UMVX	VAXSTATION II/GPX; ULTRIX 1.2; 5Mb memory; 150 Mb RD54 disk
+VMVXM	T{
+VAXSTATION II/GPX; MICROVMS V4.5; 5Mb memory; IRAF installed on 300MB
+MAXSTOR disk, data files on this disk also
+T}
+VMVX	T{
+VAXSTATION II/GPX; MICROVMS V4.5; 5Mb memory; IRAF on 300MB
+MAXSTOR disk, data on 70Mb RD53 (84% full)
+T}
+V780	T{
+VAX 11/780+FPA; VMS V4.5; 16Mb memory; IRAF installed on an RA81, data on an 
+RM03 disk with 23 free Mb, Massbus
+T}
+V780S	T{
+VAX 11/780+FPA; VMS V4.5; 16Mb memory; IRAF and data on an RA81 disk, Unibus
+T}
+V8600	VAX 8600; VMS V4.5; 28Mb memory; RA81/clustered disks
+MV10	T{
+MV 10000; AOSVS 7.54; 24Mb memory; 2-600 Mb ARGUS and 2-600 Mb KISMET disks
+T}
+MV8	T{
+MV 8000 at La Serena; 5Mb memory, 2 large DG disks, 2 small Winchesters, 
+disks nearly full and badly fragmented
+T}
+.TE
diff --git a/pkg/bench/fortask.cl b/pkg/bench/fortask.cl
new file mode 100644
index 00000000..586386e5
--- /dev/null
+++ b/pkg/bench/fortask.cl
@@ -0,0 +1,15 @@
+# FORTASK -- Execute a foreign task repeatedly.
+
+procedure fortask (nreps)
+
+int	nreps		{ prompt = "number of repetitions" }
+int	i
+
+begin
+	time; print ("======= begin ========")
+
+	for (i=nreps;  i > 0;  i-=1)
+	    !rmbin
+
+	print ("=======  end  ========"); time
+end
diff --git a/pkg/bench/mkpkg b/pkg/bench/mkpkg
new file mode 100644
index 00000000..d0ada370
--- /dev/null
+++ b/pkg/bench/mkpkg
@@ -0,0 +1,5 @@
+# Make the bench package.
+
+$omake	x_bench.x
+$link	x_bench.o
+$exit
diff --git a/pkg/bench/plots.cl b/pkg/bench/plots.cl
new file mode 100644
index 00000000..dc92ae4b
--- /dev/null
+++ b/pkg/bench/plots.cl
@@ -0,0 +1,20 @@
+# PLOTS -- Measure the time required to make a number of row plots of an image.
+
+procedure plots (image, nlines)
+
+string	image		{ prompt = "image to be plotted" }
+int	nlines		{ prompt = "number of line plots to be made" }
+
+string	imname
+int	nleft
+
+begin
+	cache ("prow")
+	imname = image
+	time(); print ("======== start ========")
+
+	for (nleft=nlines;  nleft > 0;  nleft-=1)
+	    $prow (imname, 50, >G "dev$null")
+
+	print ("========  end  ========"); time()
+end
diff --git a/pkg/bench/subproc.cl b/pkg/bench/subproc.cl
new file mode 100644
index 00000000..d1371484
--- /dev/null
+++ b/pkg/bench/subproc.cl
@@ -0,0 +1,18 @@
+# SUBPROC -- Benchmark the process control facilities.
+
+procedure subproc (nreps)
+
+int	nreps		{ prompt = "number of repetitions" }
+int	i
+
+begin
+	time; print ("======= begin ========")
+
+	for (i=nreps;  i > 0;  i-=1) {
+	    prcache ("imheader")
+	    flprcache ("imheader")
+	    time()
+	}
+
+	print ("=======  end  ========"); time
+end
diff --git a/pkg/bench/x_bench.x b/pkg/bench/x_bench.x
new file mode 100644
index 00000000..f6d6e3df
--- /dev/null
+++ b/pkg/bench/x_bench.x
@@ -0,0 +1,229 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+include	<time.h>
+include	<mach.h>
+include	<fset.h>
+include	<knet.h>
+
+# BENCH -- IRAF benchmark tasks.
+
+task	ptime		= t_ptime,
+	getpar		= t_getpar,
+	wipc		= t_wipc,
+	rbin		= t_rbin,
+	wbin		= t_wbin,
+	rrbin		= t_rrbin,
+	rtext		= t_rtext,
+	wtext		= t_wtext
+
+define	SZ_RBBUF	16384
+define	SZ_BBUF		4096
+define	SZ_TBUF		64
+
+
+# PTIME -- Print the current clock time.  This is essentially a no-op task,
+# used to test process connect/disconnect, IPC, and task startup/shutdown
+# overhead.
+
+procedure t_ptime()
+
+char	tbuf[SZ_TIME]
+long	clktime()
+
+begin
+	call cnvtime (clktime (long(0)), tbuf, SZ_TIME)
+	call printf ("%s\n")
+	    call pargstr (tbuf)
+end
+
+
+# GETPAR -- Get a parameter from the CL repeatedly.  Used to test the IPC
+# turnaround time.
+
+procedure t_getpar()
+
+int	niter, i
+char	paramval[SZ_FNAME]
+int	clgeti()
+
+begin
+	niter = clgeti ("niter")
+	do i = 1, niter
+	    call clgstr ("cl.version", paramval, SZ_FNAME)
+end
+
+
+# WIPC -- Write to IPC (tests IPC bandwidth).
+
+procedure t_wipc()
+
+int	fd, i
+char	bbuf[SZ_BBUF]
+long	n, filesize, clgetl()
+
+begin
+	fd = STDOUT
+	filesize = clgetl ("filesize") / SZB_CHAR
+
+	do i = 1, SZ_BBUF
+	    bbuf[i] = mod (i-1, 128) + 1
+
+	for (n=0;  n < filesize;  n = n + SZ_BBUF)
+	    call write (fd, bbuf, SZ_BBUF)
+
+	call eprintf ("wrote %d bytes\n")
+	    call pargl (n * SZB_CHAR)
+end
+
+
+# RBIN -- Read from a binary file.
+
+procedure t_rbin()
+
+long	totchars
+char	fname[SZ_FNAME]
+char	bbuf[SZ_BBUF]
+int	fd, open(), read()
+
+begin
+	call clgstr ("fname", fname, SZ_FNAME)
+	fd = open (fname, READ_ONLY, BINARY_FILE)
+	call fseti (fd, F_ADVICE, SEQUENTIAL)
+	totchars = 0
+
+	while (read (fd, bbuf, SZ_BBUF) == SZ_BBUF)
+	    totchars = totchars + SZ_BBUF
+
+	call close (fd)
+	call printf ("read %d bytes\n")
+	    call pargl (totchars * SZB_CHAR)
+end
+
+
+# WBIN -- Write to a binary file.
+
+procedure t_wbin()
+
+char	fname[SZ_FNAME]
+char	bbuf[SZ_BBUF]
+int	fd, i, open()
+long	n, filesize, clgetl()
+
+begin
+	call clgstr ("fname", fname, SZ_FNAME)
+	iferr (call delete (fname))
+	    ;
+	fd = open (fname, APPEND, BINARY_FILE)
+	call fseti (fd, F_ADVICE, SEQUENTIAL)
+	filesize = clgetl ("filesize") / SZB_CHAR
+
+	do i = 1, SZ_BBUF
+	    bbuf[i] = mod (i-1, 128) + 1
+
+	for (n=0;  n < filesize;  n = n + SZ_BBUF)
+	    call write (fd, bbuf, SZ_BBUF)
+
+	call close (fd)
+	call printf ("wrote %d bytes\n")
+	    call pargl (n * SZB_CHAR)
+end
+
+
+# RTEXT -- Read from a text file.
+
+procedure t_rtext()
+
+long	totchars
+char	fname[SZ_FNAME]
+char	tbuf[SZ_TBUF]
+int	fd, nchars, nlines
+int	open(), getline()
+
+begin
+	call clgstr ("fname", fname, SZ_FNAME)
+	fd = open (fname, READ_ONLY, TEXT_FILE)
+	totchars = 0
+	nlines = 0
+
+	repeat {
+	    nchars = getline (fd, tbuf)
+	    if (nchars > 0) {
+		totchars = totchars + nchars
+		nlines = nlines + 1
+	    }
+	} until (nchars == EOF)
+
+	call close (fd)
+	call printf ("read %d chars, %d lines\n")
+	    call pargl (totchars)
+	    call pargi (nlines)
+end
+
+
+# WTEXT -- Write to a text file.
+
+procedure t_wtext()
+
+char	fname[SZ_FNAME]
+char	tbuf[SZ_TBUF]
+int	fd, op, open()
+long	n, nlines, filesize, clgetl()
+
+begin
+	call clgstr ("fname", fname, SZ_FNAME)
+	iferr (call delete (fname))
+	    ;
+	fd = open (fname, APPEND, TEXT_FILE)
+	filesize = clgetl ("filesize")
+	nlines = 0
+
+	for (op=1;  op < SZ_TBUF;  op=op+1)
+	    tbuf[op] = '.'
+
+	tbuf[op] = '\n'
+	op = op + 1
+	tbuf[op] = EOS
+
+	for (n=0;  n < filesize;  n = n + SZ_TBUF) {
+	    call putline (fd, tbuf)
+	    nlines = nlines + 1
+	}
+
+	call close (fd)
+	call printf ("wrote %d chars, %d lines\n")
+	    call pargl (n)
+	    call pargi (nlines)
+end
+
+
+# RRBIN -- Raw (unbuffered) read from a binary file.
+
+procedure t_rrbin()
+
+char	fname[SZ_FNAME]
+char	bbuf[SZ_RBBUF]
+long	totchars, offset, buflen
+int	fd, chan, status
+int	open(), fstati()
+
+begin
+	call clgstr ("fname", fname, SZ_FNAME)
+	fd = open (fname, READ_ONLY, BINARY_FILE)
+	chan = fstati (fd, F_CHANNEL)
+
+	buflen   = SZ_RBBUF * SZB_CHAR
+	totchars = 0
+	offset   = 1
+	status   = 0
+
+	repeat {
+	    totchars = totchars + (status / SZB_CHAR)
+	    call zardbf (chan, bbuf, buflen, offset)
+	    offset = offset + buflen
+	    call zawtbf (chan, status)
+	} until (status <= 0)
+
+	call close (fd)
+	call printf ("read %d bytes\n")
+	    call pargl (totchars * SZB_CHAR)
+end
diff --git a/pkg/bench/xctest/README b/pkg/bench/xctest/README
new file mode 100644
index 00000000..724ec929
--- /dev/null
+++ b/pkg/bench/xctest/README
@@ -0,0 +1,2 @@
+This directory is an example of a small IRAF package, used to benchmark the
+time required to compile and link a small package.
diff --git a/pkg/bench/xctest/columns.x b/pkg/bench/xctest/columns.x
new file mode 100644
index 00000000..ee52abc5
--- /dev/null
+++ b/pkg/bench/xctest/columns.x
@@ -0,0 +1,74 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+include <ctype.h>
+include <chars.h>
+include <error.h>
+
+define	MAX_FILES	12
+
+.help columns
+.nf___________________________________________________________________
+COLUMNS -- convert a multicolumn file into a multifile column.
+	One file `sdastemp.n' is produced with each column in a 
+	Separate file.
+
+usage: COLUMNS  number_of_columns  File_name
+.endhelp______________________________________________________________
+
+
+# COLUMNS.X --  SDAS support utility
+#
+# This routine allows SDAS to treat multicolumn tables
+# as simple CL lists.  Each column in the table is referenced in
+# SDAS by a different parameter, pointing in the .par file to 
+# a different list.  This routine is a preprocessor which takes
+# a multicolumn file and generates a multifile column.
+#
+# To allow for column headers in the multicolumn file,
+# any line which begins with a `#' will be ignored.
+# All data is transferred as text.
+
+procedure t_columns()
+
+char	fname[SZ_FNAME], outfile[SZ_FNAME], outroot[SZ_FNAME]
+char	line[SZ_LINE], word[SZ_LINE], filenum[SZ_FNAME]
+int	numcols, infile
+int	outnum[MAX_FILES]
+int	nchar, nfile, ip
+int	clgeti(), open(), getline(), itoc(), ctowrd()
+errchk  open, getline
+
+begin
+
+	# Get the number of columns and the input file name
+	call clgstr ("filename", fname, SZ_FNAME)
+	numcols = clgeti ("numcols")
+	call clgstr ("outroot", outroot, SZ_FNAME)
+
+	# Open all the files
+	infile = open (fname, READ_ONLY, TEXT_FILE)
+	for (nfile=1;  nfile <= numcols;  nfile=nfile+1)  {
+	    nchar   = itoc (nfile, filenum, 2)
+	    call strcpy ( outroot, outfile, SZ_FNAME)
+	    call strcat ( filenum, outfile, SZ_FNAME)
+	    outnum[nfile] = open (outfile, NEW_FILE, TEXT_FILE)
+	}
+
+	# Separate each line of the input file
+	while (getline(infile, line) != EOF) {
+	    if ((line[1] != '#') && (line[1] != '\n'))  {
+		ip = 1
+		for (nfile=1; nfile <= numcols; nfile=nfile+1)  {
+		    nchar = ctowrd (line, ip, word, SZ_LINE)
+		    call strcat ('\n',word, SZ_LINE)
+		    call putline (outnum[nfile], word)
+		}
+	    }
+	}
+
+	# close the files
+	call close(infile)
+	for (nfile=1; nfile <= numcols; nfile=nfile+1) {
+	    call close(outnum[nfile])
+	}
+end
diff --git a/pkg/bench/xctest/lintran.x b/pkg/bench/xctest/lintran.x
new file mode 100644
index 00000000..fe0ffdbc
--- /dev/null
+++ b/pkg/bench/xctest/lintran.x
@@ -0,0 +1,370 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+include	<pattern.h>
+include	<ctype.h>
+
+define	MAX_FIELDS	100		# Maximum number of fields in list
+define	TABSIZE		8		# Spacing of tab stops
+define	LEN_TR		9		# Length of structure TR
+
+# The TR transformation descriptor structure.
+
+define	X1		Memr[P2R($1)]	# Input origin
+define	Y1		Memr[P2R($1+1)]
+define	XSCALE		Memr[P2R($1+2)]	# Scale factors
+define	YSCALE		Memr[P2R($1+3)]
+define	THETA		Memr[P2R($1+4)]	# Rotation angle
+define	X2		Memr[P2R($1+5)]	# Output origin
+define	Y2		Memr[P2R($1+6)]
+define  COS_THETA	Memr[P2R($1+7)]
+define	SIN_THETA	Memr[P2R($1+8)]
+
+
+# LINTRAN -- Performs a linear translation on each element of the
+# input list, producing a transformed list as output.
+
+procedure t_lintran()
+
+char	in_fname[SZ_FNAME]
+int	list
+pointer	sp, tr
+int	xfield, yfield, min_sigdigits
+
+int 	clgeti(), clpopni(), clgfil()
+
+begin
+	# Allocate memory for transformation parameters structure
+	call smark (sp)
+	call salloc (tr, LEN_TR, TY_STRUCT)
+
+	# Call procedure to get parameters and fill structure
+	call lt_initialize_transform (tr)
+
+	# Get field numbers from cl
+	xfield = clgeti ("xfield")
+	yfield = clgeti ("yfield")
+	min_sigdigits = clgeti("min_sigdigits")
+
+	# Open template of input files
+	list = clpopni ("files")
+
+	# While input list is not depleted, open file and transform list
+	while (clgfil (list, in_fname, SZ_FNAME) != EOF) 
+	    call lt_transform_file (in_fname, xfield, yfield, min_sigdigits, tr)
+
+	# Close template
+	call clpcls (list)
+	call sfree (sp)
+end
+
+
+# LT_INITIALIZE_TRANSFORM -- gets parameter values relevant to the
+# transformation from the cl.  List entries will be transformed
+# in procedure lt_transform.  Scaling is performed
+# first, followed by translation and then rotation.
+
+procedure lt_initialize_transform (tr)
+
+pointer	tr
+
+bool	clgetb()
+real	clgetr()
+
+begin
+	# Get parameters from cl
+	X1(tr) = clgetr ("x1")				# (x1,y1) = crnt origin
+	Y1(tr) = clgetr ("y1")
+	XSCALE(tr) = clgetr ("xscale")
+	YSCALE(tr) = clgetr ("yscale")
+	THETA(tr) = clgetr ("angle")
+	if (! clgetb ("radians"))
+	    THETA(tr) = THETA(tr) / 57.29577951 
+	X2(tr) = clgetr ("x2")				# (x2,y2) = new origin
+	Y2(tr) = clgetr ("y2")
+
+	# The following terms are constant for a given transformation.
+	# They are calculated once and saved in the structure.
+
+	COS_THETA(tr) = cos (THETA(tr))
+	SIN_THETA(tr) = sin (THETA(tr))
+end
+
+
+# LT_TRANSFORM_FILE -- This procedure is called once for each file
+# in the input list.  For each line in the input file that isn't
+# blank or comment, the line is transformed.  Blank and comment
+# lines are output unaltered.
+
+procedure lt_transform_file (in_fname, xfield, yfield, min_sigdigits, tr)
+
+char	in_fname[ARB]
+int	xfield, yfield
+pointer	tr
+
+char	outbuf[SZ_LINE]
+int	nfields, nchars, max_fields, in, nline
+int	nsdig_x, nsdig_y, offset, min_sigdigits
+pointer	sp, field_pos, linebuf, inbuf, ip
+double	x, y, xt, yt
+int	getline(), lt_get_num(), open()
+
+begin
+	call smark (sp)
+	call salloc (inbuf, SZ_LINE, TY_CHAR)
+	call salloc (linebuf, SZ_LINE, TY_CHAR)
+	call salloc (field_pos, MAX_FIELDS, TY_INT)
+
+	max_fields = MAX_FIELDS
+
+	# Open input file
+	in = open (in_fname, READ_ONLY, TEXT_FILE)
+
+	for (nline=1;  getline (in, Memc[inbuf]) != EOF;  nline = nline + 1) {
+	    for (ip=inbuf;  IS_WHITE(Memc[ip]);  ip=ip+1)
+		;
+	    if (Memc[ip] == '#') {
+		# Pass comment lines on to the output unchanged.
+		call putline (STDOUT, Memc[inbuf])
+		next
+	    } else if (Memc[ip] == '\n' || Memc[ip] == EOS) {
+		# Blank lines too.
+		call putline (STDOUT, Memc[inbuf])
+		next
+	    }
+
+	    # Expand tabs into blanks, determine field offsets.
+	    call strdetab (Memc[inbuf], Memc[linebuf], SZ_LINE, TABSIZE)
+	    call lt_find_fields (Memc[linebuf], Memi[field_pos],
+		max_fields, nfields)
+
+	    if (xfield > nfields || yfield > nfields) {
+		call eprintf ("Not enough fields in file '%s', line %d\n")
+		    call pargstr (in_fname)
+		    call pargi (nline)
+		call putline (STDOUT, Memc[linebuf])
+		next
+	    }
+		 
+            offset = Memi[field_pos + xfield-1]
+	    nchars = lt_get_num (Memc[linebuf+offset-1], x, nsdig_x)
+	    if (nchars == 0) {
+		call eprintf ("Bad x value in file '%s' at line %d:\n")
+		    call pargstr (in_fname)
+		    call pargi (nline)
+		call putline (STDOUT, Memc[linebuf])
+		next
+	    }
+
+            offset = Memi[field_pos + yfield-1]
+	    nchars = lt_get_num (Memc[linebuf+offset-1], y, nsdig_y)
+	    if (nchars == 0) {
+		call eprintf ("Bad y value in file '%s' at line %d:\n")
+		    call pargstr (in_fname)
+		    call pargi (nline)
+		call putline (STDOUT, Memc[linebuf])
+		next
+	    }
+		 
+	    call lt_transform (x, y, xt, yt, tr)
+
+	    call lt_pack_line (Memc[linebuf], outbuf, SZ_LINE, Memi[field_pos], 
+	      nfields, xfield, yfield, xt, yt, nsdig_x, nsdig_y, min_sigdigits)
+
+	    call putline (STDOUT, outbuf)
+	}
+
+	call sfree (sp)
+	call close (in)
+end
+
+
+# LT_FIND_FIELDS -- This procedure finds the starting column for each field
+# in the input line.  These column numbers are returned in the array
+# field_pos; the number of fields is also returned.
+
+procedure lt_find_fields (linebuf, field_pos, max_fields, nfields)
+
+char	linebuf[SZ_LINE]
+int	field_pos[max_fields],max_fields, nfields
+bool	in_field
+int	ip, field_num
+
+begin
+	field_num = 1
+	field_pos[1] = 1
+	in_field = false
+
+	for (ip=1; linebuf[ip] != '\n' && linebuf[ip] != EOS; ip=ip+1) {
+	    if (! IS_WHITE(linebuf[ip]))
+		in_field = true
+	    else if (in_field) {
+		in_field = false
+		field_num = field_num + 1
+		field_pos[field_num] = ip
+	    }
+	}
+
+	field_pos[field_num+1] = ip 
+	nfields = field_num
+end
+
+
+# LT_GET_NUM -- The field entry is converted from character to double
+# in preparation for the transformation.  The number of significant
+# digits is counted and returned as an argument; the number of chars in
+# the number is returned as the function value.
+
+int procedure lt_get_num (linebuf, dval, nsdig) 
+
+char	linebuf[SZ_LINE]
+int	nsdig
+double	dval
+char	ch
+int 	nchar, ip
+
+int	gctod()
+
+begin
+	ip = 1
+	nsdig = 0
+	nchar = gctod (linebuf, ip, dval)
+	if (nchar == 0 || IS_INDEFD (dval))
+	    return (nchar)
+
+	# Skip leading white space.
+	ip = 1
+    	repeat {
+	    ch = linebuf[ip]
+	    if (! IS_WHITE(ch)) 
+		break
+	    ip = ip + 1
+	} 
+
+	# Count signifigant digits
+	for (; ! IS_WHITE(ch) && ch != '\n' && ch != EOS; ch=linebuf[ip]) {
+	    if (IS_DIGIT (ch))
+		nsdig = nsdig + 1
+		ip = ip + 1
+	}
+	return (nchar)
+end
+
+
+# LT_TRANSFORM -- The linear transformation is performed in this procedure.
+# First the coordinates are scaled, then rotated and translated.  The
+# transformed coordinates are returned.
+
+procedure lt_transform (x, y, xt, yt, tr)
+
+double	x, y, xt, yt
+pointer	tr
+double	xtemp, ytemp
+
+begin
+	# Subtract off current origin:
+	if (IS_INDEFD (x))
+	    xt = INDEFD
+	else {
+	    xt = x - X1(tr)
+	}
+	if (IS_INDEFD (y))
+	    yt = INDEFD
+	else {
+	    yt = y - Y1(tr)
+	}
+
+	# Scale and rotate coordinates:
+	if (THETA(tr) == 0) {
+	    if (!IS_INDEFD (xt))
+		xt = xt * XSCALE(tr) + X2(tr)
+	    if (!IS_INDEFD (yt))
+		yt = yt * YSCALE(tr) + Y2(tr)
+	    return
+
+	} else if (IS_INDEFD(xt) || IS_INDEFD(yt)) {
+	    # Non-zero angle and either coordinate indefinite results in
+	    # both transformed coordinates = INDEFD
+	    xt = INDEFD
+	    yt = INDEFD
+	    return
+	}
+
+	# Rotation for non-zero angle and both coordinates defined
+	xtemp = xt * XSCALE(tr)
+	ytemp = yt * YSCALE(tr)
+
+	xt = xtemp * COS_THETA(tr) - ytemp * SIN_THETA(tr) 
+	yt = xtemp * SIN_THETA(tr) + ytemp * COS_THETA(tr)
+
+	# Now shift the rotated coordinates
+	xt = xt + X2(tr)
+	yt = yt + Y2(tr)
+end
+
+
+# LT_PACK_LINE -- Fields are packed into the outbuf buffer.  Transformed
+# fields are converted to strings; other fields are copied from
+# the input line to output buffer.
+
+procedure lt_pack_line (inbuf, outbuf, maxch, field_pos, nfields, 
+		xfield, yfield, xt, yt, nsdig_x, nsdig_y, min_sigdigits)
+
+char	inbuf[ARB], outbuf[maxch]
+int	maxch, field_pos[ARB], nfields, xfield, yfield, nsdig_x, nsdig_y
+int	min_sigdigits
+double	xt, yt
+
+char	field[SZ_LINE]
+int	num_field, width, op
+
+int	gstrcpy()
+
+begin
+	# Initialize output pointer.
+	op = 1
+
+	do num_field = 1, nfields {
+	    width = field_pos[num_field + 1] - field_pos[num_field]
+
+	    if (num_field == xfield) {
+	        call lt_format_field (xt, field, maxch, nsdig_x, width, 
+				      min_sigdigits)
+	    } else if (num_field == yfield) {
+		call lt_format_field (yt, field, maxch, nsdig_y, width,
+				      min_sigdigits)
+	    } else {
+	        # Put "width" characters from inbuf into field
+		call strcpy (inbuf[field_pos[num_field]], field, width)
+	    }
+
+	    # Fields must be delimited by at least one blank.
+	    if (num_field > 1 && !IS_WHITE (field[1])) {
+		outbuf[op] = ' '
+		op = op + 1
+	    }
+
+	    # Copy "field" to output buffer.
+	    op = op + gstrcpy (field, outbuf[op], maxch)
+	}
+
+	outbuf[op] = '\n'
+	outbuf[op+1] = EOS
+end
+
+
+# LT_FORMAT_FIELD -- A transformed coordinate is written into a string
+# buffer.  The output field is of (at least) the same width and significance
+# as the input list entry.
+
+procedure lt_format_field (dval, wordbuf, maxch, nsdig, width, min_sigdigits)
+
+char	wordbuf[maxch]
+int	width, nsdig, maxch, min_sigdigits
+double	dval
+
+begin
+	call sprintf (wordbuf, maxch, "%*.*g")
+	    call pargi (width)
+	    call pargi (max (min_sigdigits, nsdig))
+	    call pargd (dval)
+end
diff --git a/pkg/bench/xctest/mkpkg b/pkg/bench/xctest/mkpkg
new file mode 100644
index 00000000..87b4c792
--- /dev/null
+++ b/pkg/bench/xctest/mkpkg
@@ -0,0 +1,25 @@
+# Make the LISTS package
+
+$call	relink
+$exit
+
+relink:
+	$set	LIBS = "-lxtools"
+
+	$update	libpkg.a
+	$omake	x_lists.x
+	$link	x_lists.o libpkg.a $(LIBS)
+	;
+
+clean:
+	$delete	libpkg.a x_lists.o x_lists.e
+	;
+
+libpkg.a:
+	table.x		<ctype.h>
+	words.x
+	tokens.x	<ctotok.h>
+	unique.x
+	lintran.x	<pattern.h> <ctype.h>
+	columns.x	<ctype.h> <chars.h> <error.h>
+	;
diff --git a/pkg/bench/xctest/table.x b/pkg/bench/xctest/table.x
new file mode 100644
index 00000000..75e0a3e3
--- /dev/null
+++ b/pkg/bench/xctest/table.x
@@ -0,0 +1,111 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+include	<ctype.h>
+
+# Read a list of strings from the standard input or a list of files and
+# assemble them into a nicely formatted table.  If reading from multiple
+# input files, make a separate table for each.  There is no fixed limit
+# to the size of the table which can be formatted.  The table is not
+# sorted; this should be done as a separate operation if desired.
+
+define	INIT_STRBUF		512
+define	STRBUF_INCREMENT	1024
+define	INIT_MAXSTR		64
+define	MAXSTR_INCREMENT	128
+
+
+procedure t_table()
+
+int	list, first_col, last_col, ncols, maxstrlen
+int	fd, nextch, nstrings, maxch, sz_strbuf, max_strings, ip
+pointer	sp, strbuf, fname, stroff
+int	strlen(), fscan(), nscan(), clpopni()
+int	clgfil(), open(), envgeti(), clplen(), clgeti()
+
+begin
+	# Allocate buffers.  The string buffer "strbuf", and associated list
+	# of offsets "stroff" will be reallocated later if they fill up.
+	call smark (sp)
+	call salloc (fname, SZ_FNAME, TY_CHAR)
+
+	call malloc (strbuf, INIT_STRBUF, TY_CHAR)
+	call malloc (stroff, INIT_MAXSTR, TY_INT)
+
+
+	# Get various table formatting parameters from CL.
+	ncols = clgeti ("ncols")
+	first_col = clgeti ("first_col")
+	last_col = clgeti ("last_col")
+
+	# Attempt to read the terminal x-dimension from the environment,
+	# if the user did not specify a valid "last_col".  No good reason
+	# to abort if cannot find environment variable.
+	if (last_col == 0)
+	    iferr (last_col = envgeti ("ttyncols"))
+		last_col = 80
+
+	# Set maximum string length to size of an output line if max length
+	# not given.
+	maxstrlen = clgeti ("maxstrlen")
+	if (maxstrlen == 0)
+	    maxch = last_col - first_col + 1
+	else
+	    maxch = min (maxstrlen, last_col - first_col + 1)
+
+	max_strings = INIT_MAXSTR
+	sz_strbuf = INIT_STRBUF
+
+
+	# Read the contents of each file into a big string buffer.  Print a
+	# separate table for each file.
+
+	list = clpopni ("input_files")
+
+	while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) {
+	    fd = open (Memc[fname], READ_ONLY, TEXT_FILE)
+	    nextch = 1
+	    nstrings = 0
+
+	    # If printing several tables, label each with the name of the file.
+	    if (clplen (list) > 1) {
+		call printf ("\n==> %s <==\n")
+		    call pargstr (Memc[fname])
+	    }
+		
+	    while (fscan (fd) != EOF) {
+		call gargstr (Memc[strbuf+nextch-1], maxch)
+		# Ignore blank lines and faulty scans.
+		if (nscan() == 0)
+		    next
+		for (ip=strbuf+nextch-1;  IS_WHITE (Memc[ip]);  ip=ip+1)
+		    ;
+		if (Memc[ip] == '\n' || Memc[ip] == EOS)
+		    next
+
+		# Save one indexed string index for strtbl.
+		Memi[stroff+nstrings] = nextch
+		nextch = nextch + strlen (Memc[strbuf+nextch-1]) + 1
+
+		# Check buffers, make bigger if necessary.
+		if (nextch + maxch >= sz_strbuf) {
+		    sz_strbuf = sz_strbuf + STRBUF_INCREMENT
+		    call realloc (strbuf, sz_strbuf, TY_CHAR)
+		}
+		# Add space for more string offsets if too many strings.
+		nstrings = nstrings + 1
+		if (nstrings > max_strings) {
+		    max_strings = max_strings + MAXSTR_INCREMENT
+		    call realloc (stroff, max_strings, TY_INT)
+		}
+	    }
+
+	    # Print the table on the standard output.
+	    call strtbl (STDOUT, Memc[strbuf], Memi[stroff], nstrings,
+	    first_col, last_col, maxch, ncols)
+	}
+
+	call clpcls (list)
+	call mfree (strbuf, TY_CHAR)
+	call mfree (stroff, TY_INT)
+	call sfree (sp)
+end
diff --git a/pkg/bench/xctest/tokens.x b/pkg/bench/xctest/tokens.x
new file mode 100644
index 00000000..c8793748
--- /dev/null
+++ b/pkg/bench/xctest/tokens.x
@@ -0,0 +1,140 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+include	<ctotok.h>
+
+.help tokens
+.nf ___________________________________________________________________________
+TOKENS -- Break the input up into a series of tokens.  The makeup of the
+various tokens is defined by the FMTIO primitive ctotok, which is not very 
+sophisticated, and does not claim to recognize the tokens for any particular
+language (though it does reasonably well for most modern languages).  Comments
+can be deleted if desired, and newlines may be passed on to the output as
+tokens.
+
+Comments are delimited by user specified strings.  Only strings which are also
+recognized by ctotok() as legal tokens may be used as comment delimiters.
+If newline marks the end of a comment, the end_comment string should be given
+as "eol".  Examples of acceptable comment conventions are ("#", eol),
+("/*", "*/"), ("{", "}"), and ("!", eol).  Fortran style comments ("^{c}",eol)
+can be stripped by filtering with match beforehand.
+
+Each token is passed to the output on a separate line.  Multiple newline
+tokens are compressed to a single token (a blank line).  If newline is not
+desired as an output token, it is considered whitespace and serves only to
+delimit tokens.
+.endhelp ______________________________________________________________________
+
+define	SZ_COMDELIMSTR	20		# Comment delimiter string.
+
+procedure t_tokens()
+
+bool	ignore_comments, comment_delimiter_is_eol
+bool	in_comment, pass_newlines
+char	begin_comment[SZ_COMDELIMSTR], end_comment[SZ_COMDELIMSTR]
+int	fd, list, token, last_token, last_nscan
+pointer	sp, fname, tokbuf, outstr, ip, op
+
+bool	streq(), clgetb()
+int	clpopni(), clgfil(), fscan(), nscan(), open(), ctocc()
+
+begin
+	call smark (sp)
+	call salloc (fname, SZ_FNAME, TY_CHAR)
+	call salloc (tokbuf, SZ_LINE, TY_CHAR)
+	call salloc (outstr, SZ_LINE, TY_CHAR)
+
+	# If comments are to be ignored, get comment delimiters.
+	ignore_comments = clgetb ("ignore_comments")
+	if (ignore_comments) {
+	    call clgstr ("begin_comment", begin_comment, SZ_COMDELIMSTR)
+	    call clgstr ("end_comment", end_comment, SZ_COMDELIMSTR)
+	    comment_delimiter_is_eol = streq (end_comment, "eol")
+	} else {
+	    # Set begin_comment to null string to ensure that we never
+	    # enter skip comment mode.  This requires that we check for the
+	    # EOS token before the begin_comment token below.
+	    begin_comment[1] = EOS
+	}
+
+	# Is newline a token?
+	pass_newlines = clgetb ("newlines")
+
+
+	# Merge all input files into a single stream of tokens on the standard
+	# output.
+	list = clpopni ("files")
+
+	while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) {
+	    fd = open (Memc[fname], READ_ONLY, TEXT_FILE)
+	    last_token = NULL
+
+	    while (fscan (fd) != EOF) {
+		# Break input line into a stream of tokens.
+		repeat {
+		    last_nscan = nscan()
+		    call gargtok (token, Memc[tokbuf], SZ_LINE)
+
+		    # If "nscan" did not increment (actually impossible with
+		    # gargtok) the line has been exhausted.
+		    if (nscan() == last_nscan)
+			break
+
+		    # If busy ignoring a comment, check for delimiter.
+		    if (in_comment) {
+			if (comment_delimiter_is_eol &&
+			(token == TOK_NEWLINE || token == TOK_EOS)) {
+			    in_comment = false
+			    if (pass_newlines && last_token != TOK_NEWLINE) {
+				call printf ("\n")
+				last_token = TOK_NEWLINE
+			    }
+			    break
+			} else if (streq (Memc[tokbuf], end_comment)) {
+			    in_comment = false
+			    next
+			} else
+			    next
+		    }
+
+		    # If we get here, we are not processing a comment.
+
+		    if (token == TOK_NEWLINE) {
+			if (pass_newlines && last_token != TOK_NEWLINE)
+			    call printf ("\n")
+			last_token = TOK_NEWLINE
+			break
+
+		    } else if (token == TOK_EOS) {
+			# EOS is not counted as a token (do not set last_token,
+			# do not generate any output).
+			break
+
+		    } else if (streq (Memc[tokbuf], begin_comment)) {
+			in_comment = true
+			# Do not change last_token, since comment token
+			# is to be ignored.
+			next
+
+		    } else if (token == TOK_STRING) {
+			# Convert control characters into printable
+			# sequences before printing string token.
+			op = outstr
+			for (ip=tokbuf;  Memc[ip] != EOS;  ip=ip+1)
+			    op = op + ctocc (Memc[ip], Memc[op], SZ_LINE)
+			call printf ("\"%s\"\n")
+			    call pargstr (Memc[outstr])
+
+		    } else {				# most tokens
+			call printf ("%s\n")
+			    call pargstr (Memc[tokbuf])
+		    }
+
+		    last_token = token
+		}
+	    }
+	    call close (fd)
+	}
+
+	call clpcls (list)
+	call sfree (sp)
+end
diff --git a/pkg/bench/xctest/unique.x b/pkg/bench/xctest/unique.x
new file mode 100644
index 00000000..fcabfe00
--- /dev/null
+++ b/pkg/bench/xctest/unique.x
@@ -0,0 +1,46 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+# UNIQUE -- Pass only unique lines from the (presumably sorted) standard
+# input to the standard output.  In other words, if a sequence of identical
+# lines are found in the input, only one copy is passed to the output.
+
+procedure t_unique()
+
+int	list, fd
+pointer	sp, fname, old_line, new_line, temp
+bool	streq()
+int	getline(), clpopni(), clgfil(), clplen(), open()
+
+begin
+	call smark (sp)
+	call salloc (fname, SZ_FNAME, TY_CHAR)
+	call salloc (old_line, SZ_LINE, TY_CHAR)
+	call salloc (new_line, SZ_LINE, TY_CHAR)
+
+	list = clpopni ("files")
+
+	while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) {
+	    fd = open (Memc[fname], READ_ONLY, TEXT_FILE)
+	    if (clplen (list) > 1) {
+		call printf ("\n\n==> %s <==\n")
+		    call pargstr (Memc[fname])
+	    }
+
+	    Memc[old_line] = EOS
+
+	    while (getline (fd, Memc[new_line]) != EOF) {
+		if (streq (Memc[old_line], Memc[new_line]))
+		    next
+		call putline (STDOUT, Memc[new_line])
+
+		# Swap buffers.
+		temp = old_line
+		old_line = new_line
+		new_line = temp
+	    }
+
+	    call close (fd)
+	}
+
+	call sfree (sp)
+end
diff --git a/pkg/bench/xctest/words.x b/pkg/bench/xctest/words.x
new file mode 100644
index 00000000..42f4f97e
--- /dev/null
+++ b/pkg/bench/xctest/words.x
@@ -0,0 +1,44 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+# WORDS -- Break the input up into a series of words or strings.  A word
+# is a sequence of characters delimited by whitespace or newline.  A string
+# is delimited by single or double quotes, and may not span more than a single
+# line.
+
+procedure t_words()
+
+int	fd, list, last_nscan
+pointer	sp, fname, word
+int	clpopni(), clgfil(), fscan(), nscan(), open()
+
+begin
+	call smark (sp)
+	call salloc (fname, SZ_FNAME, TY_CHAR)
+	call salloc (word, SZ_LINE, TY_CHAR)
+
+	list = clpopni ("files")
+
+	while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) {
+	    fd = open (Memc[fname], READ_ONLY, TEXT_FILE)
+
+	    # We do not know how may "words" there are on a line; get words
+	    # until no more.
+	    while (fscan (fd) != EOF)
+		repeat {
+		    # When nscan() does not increment after a call to gargwrd(),
+		    # we are all done.
+		    last_nscan = nscan()
+		    call gargwrd (Memc[word], SZ_LINE)
+		    if (nscan() > last_nscan) {
+			call printf ("%s\n")
+			    call pargstr (Memc[word])
+		    } else
+			break
+		}
+
+	    call close (fd)
+	}
+
+	call clpcls (list)
+	call sfree (sp)
+end
diff --git a/pkg/bench/xctest/x_lists.x b/pkg/bench/xctest/x_lists.x
new file mode 100644
index 00000000..01229e61
--- /dev/null
+++ b/pkg/bench/xctest/x_lists.x
@@ -0,0 +1,10 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+
+# Process configuration of the LISTS package.
+
+task	table	= t_table,
+	tokens	= t_tokens,
+	unique	= t_unique,
+	words	= t_words,
+	lintran	= t_lintran,
+	columns = t_columns