diff options
Diffstat (limited to 'pkg/bench')
-rw-r--r-- | pkg/bench/README | 2 | ||||
-rw-r--r-- | pkg/bench/bench.cl | 23 | ||||
-rw-r--r-- | pkg/bench/bench.hlp | 1723 | ||||
-rw-r--r-- | pkg/bench/bench.ms | 788 | ||||
-rw-r--r-- | pkg/bench/bench_tab.ms | 98 | ||||
-rw-r--r-- | pkg/bench/fortask.cl | 15 | ||||
-rw-r--r-- | pkg/bench/mkpkg | 5 | ||||
-rw-r--r-- | pkg/bench/plots.cl | 20 | ||||
-rw-r--r-- | pkg/bench/subproc.cl | 18 | ||||
-rw-r--r-- | pkg/bench/x_bench.x | 229 | ||||
-rw-r--r-- | pkg/bench/xctest/README | 2 | ||||
-rw-r--r-- | pkg/bench/xctest/columns.x | 74 | ||||
-rw-r--r-- | pkg/bench/xctest/lintran.x | 370 | ||||
-rw-r--r-- | pkg/bench/xctest/mkpkg | 25 | ||||
-rw-r--r-- | pkg/bench/xctest/table.x | 111 | ||||
-rw-r--r-- | pkg/bench/xctest/tokens.x | 140 | ||||
-rw-r--r-- | pkg/bench/xctest/unique.x | 46 | ||||
-rw-r--r-- | pkg/bench/xctest/words.x | 44 | ||||
-rw-r--r-- | pkg/bench/xctest/x_lists.x | 10 |
19 files changed, 3743 insertions, 0 deletions
diff --git a/pkg/bench/README b/pkg/bench/README new file mode 100644 index 00000000..0e892171 --- /dev/null +++ b/pkg/bench/README @@ -0,0 +1,2 @@ +BENCH -- IRAF benchmarks package. Documented in the bench.hlp file in this +directory. diff --git a/pkg/bench/bench.cl b/pkg/bench/bench.cl new file mode 100644 index 00000000..9a84da27 --- /dev/null +++ b/pkg/bench/bench.cl @@ -0,0 +1,23 @@ +images +plot + +#{ BENCH -- Benchmarks package. + +package bench + +set bench = "pkg$bench/" + +task fortask = "bench$fortask.cl" +task subproc = "bench$subproc.cl" +task plots = "bench$plots.cl" + +task $ptime, + $getpar, + $wipc.bb, + $rrbin, + $rbin, + $wbin, + $rtext, + $wtext = "bench$x_bench.e" + +clbye() diff --git a/pkg/bench/bench.hlp b/pkg/bench/bench.hlp new file mode 100644 index 00000000..3b7a97b9 --- /dev/null +++ b/pkg/bench/bench.hlp @@ -0,0 +1,1723 @@ +.help bench Mar86 "IRAF Performance Tests" +.ce +\fBA Set of Benchmarks for Measuring IRAF System Performance\fR +.ce +Doug Tody +.ce +March 28, 1986 +.ce +(Revised July 1987) + +.nh +Introduction + + This set of benchmarks has been prepared with a number of purposes in mind. +Firstly, the benchmarks may be run after installing IRAF on a new system to +verify that the performance expected for that machine is actually being +achieved. In general, this cannot be taken for granted since the performance +actually achieved on a particular system can be highly dependent upon how the +system is configured and tuned. Secondly, the benchmarks may be run to compare +the performance of different IRAF hosts, or to track the system performance +over a period of time as improvements are made, both to IRAF and to the host +system. Lastly, the benchmarks provide a metric which can be used to tune +the host system. + +All too often, the only benchmarks run on a system are those which test the +execution time of optimized code generated by the host Fortran compiler. +This is primarily a hardware benchmark and secondarily a test of the Fortran +optimizer. An example of this type of test is the famous Linpack benchmark. + +The numerical execution speed test is an important benchmark but it tests only +one of the many factors contributing to the overall performance of the system +as perceived by the user. In interactive use other factors are often more +important, e.g., the time required to spawn or communicate with a subprocess, +the time required to access a file, the response of the system as the number +of users (or processes) increases, and so on. While the quality of optimized +code is a critical factor for cpu intensive batch processing, other factors +are often more important for sophisticated interactive applications. + +The benchmarks described here are designed to test, as fully as possible, +the major factors contributing to the overall performance of the IRAF system +on a particular host. A major factor in the timings of each benchmark is +of course the IRAF system itself, but comparisons of different hosts are +nonetheless possible since the code is virtually identical on all hosts. +The IRAF kernel is coded differently for each host, but the functions +performed by the kernel are identical on each host, and in most cases the +kernel operations are a negligible factor in the final timings. + +The IRAF version number, host operating system and associated version number, +and the host computer hardware configuration are all important in interpreting +the results of the benchmarks, and should always be recorded. + +.nh +What is Measured + + Each benchmark measures two quantities, the total cpu time required to +execute the benchmark, and the total (wall) clock time required to execute the +benchmark. If the clock time measurement is to be of any value the benchmarks +must be run on a single user system. Given this "best time" measurement, +it is not difficult to predict the performance to be expected on a loaded +system. + +The total cpu time required to execute a benchmark consists of the "user" time +plus the "system" time. The "user" time is the cpu time spent executing +the instructions comprising the user program. The "system" time is the cpu +time spent in kernel mode executing the system services called by the user +program. When possible we give both measurements, while in some cases only +the user time is given, or only the sum of the user and system times. +If the benchmark involves several concurrent processes no cpu time measurement +may be possible on some systems. The cpu time measurements are therefore +only reliable for the simpler benchmarks. + +The clock time measurement will of course include both the user and system +execution time, plus the time spent waiting for i/o. Any minor system daemon +processes executing while the benchmarks are being run may bias the clock +time measurement slightly, but since these are a constant part of the host +environment it is fair to include them in the timings. Major system daemons +which run infrequently (e.g., the print symbiont in VMS) should invalidate +the benchmark. + +A comparison of the cpu and clock times tells whether the benchmark was cpu +or i/o bound (assuming a single user system). Those benchmarks involving +compiled IRAF tasks do not include the process startup and pagein times +(these are measured by a different benchmark), hence the task should be run +once before running the benchmark to connect the subprocess and page in +the memory used by the task. A good procedure to follow is to run each +benchmark once to start the process, and then repeat the benchmark three times, +averaging the results. If inconsistent results are obtained further iterations +and/or monitoring of the host system are called for until a consistent result +is achieved. + +Many benchmarks depend upon disk performance as well as compute cycles. +For such a benchmark to be a meaningful measure of the i/o bandwidth of the +system it is essential that no other users (or batch jobs) be competing for +disk seeks on the disk used for the test file. There are subtle things to +watch out for in this regard, for example, if the machine is in a VMS cluster +or on a local area network, processes on other nodes may be accessing the +local disk, yet will not show up on a user login or process list on the local +node. It is always desirable to repeat each test several times or on several +different disk devices, to ensure that no outside requests were being serviced +while the benchmark was being run. If the system has disk monitoring utilities +use these to find an idle disk before running any benchmarks which do heavy i/o. + +Beware of disks which are nearly full; the maximum achievable i/o bandwidth +will fall off rapidly as a disk fills up, due to disk fragmentation (the file +must be stored in little pieces scattered all over the physical disk). +Similarly, many systems (VMS, AOS/VS) suffer from disk fragmentation problems +that gradually worsen as a files system ages, requiring that the disk +periodically be backed off onto tape and then restored. In some cases, +disk fragmentation can cause the maximum achievable i/o bandwidth to degrade +by an order of magnitude. + +.nh +The Benchmarks + + Instructions are given for running each benchmark, and the operations +performed by each benchmark are briefly described. The system characteristics +measured by the benchmark are briefly discussed. A short mnemonic name is +associated with each benchmark to identify it in the tables given in the +\fIresults\fR section. + +.nh 2 +Host Level Benchmarks + + The benchmarks discussed in this section are run at the host system level. +The examples are given for the UNIX cshell, under the assumption that a host +dependent example is better than none at all. These commands must be +translated by the user to run the benchmarks on a different system. + +.nh 3 +CL Startup/Shutdown [CLSS] + + Go to the CL login directory, mark the time (the method by which this is +done is system dependent), and startup the CL. Enter the "logout" command +while the CL is starting up so that the CL will not be idle (with the clock +running) while the command is being entered. Mark the final cpu and clock +time and compute the difference. + +.nf + % time cl + logout +.fi + +This is a complex benchmark but one which is of obvious importance to the +IRAF user. The benchmark is probably dominated by the cpu time required to +start up the CL, i.e., start up the CL process, initialize the i/o system, +initialize the environment, interpret the CL startup file, interpret the +user LOGIN.CL file, connect and disconnect the x_system.e subprocess, and so on. +Most of the remaining time is the overhead of the host operating system for +the process spawns, page faults, file accesses, and so on. + +.nh 3 +Mkpkg (verify) [MKPKGV] + + Go to the PKG directory and enter the (host system equivalent of the) +following command. The method by which the total cpu and clock times are +computed is system dependent. + +.nf + % cd $iraf/pkg + % time mkpkg -n +.fi + +This benchmark does a "no execute" make-package of the entire PKG suite of +applications and systems packages. This tests primarily the speed with which +the host system can read directories, resolve pathnames, and return directory +information for files. Since the PKG directory tree is continually growing, +this benchmark is only useful for comparing the same version of IRAF run on +different hosts, or the same version of IRAF on the same host at different +times. + +.nh 3 +Mkpkg (compile) [MKPKGC] + + Go to the directory "iraf$pkg/bench/xctest" and enter the (host system +equivalents of the) following commands. The method by which the total cpu +and clock times are computed is system dependent. Only the \fBmkpkg\fR +command should be timed. + +.nf + % cd $iraf/pkg/bench/xctest + % mkpkg clean # delete old library, etc., if present + % time mkpkg + % mkpkg clean # delete newly created binaries +.fi + +This tests the time required to compile and link a small IRAF package. +The timings reflect the time required to preprocess, compile, optimize, +and assemble each module and insert it into the package library, then link +the package executable. The host operating system overhead for the process +spawns, page faults, etc. is also a major factor. + +.nh 2 +IRAF Applications Benchmarks + + The benchmarks discussed in this section are run from within the IRAF +environment, using only standard IRAF applications tasks. The cpu and clock +execution times of any (compiled) IRAF task may be measured by prefixing +the task name with a $ when the command is entered, as shown in the examples. +The significance of the cpu time measurement is not precisely defined for +all systems. On a UNIX host, it is the "user" cpu time used by the task. +On a VMS host, there does not appear to be any distinction between the user +and system times (probably because the system services execute in the context +of the calling process), hence the cpu time given probably includes both. + +.nh 3 +Mkhelpdb [MKHDB] + + The \fBmkhelpdb\fR task is in the \fBsoftools\fR package. The function of +the task is to scan the tree of ".hd" help-directory files and compile the +binary help database. + +.nf + cl> softools + cl> $mkhelpdb +.fi + +This benchmark tests primarily the global optimization of the Fortran +compiler, since the code being executed is quite complex. It also tests the +speed with which text files can be opened and read. Since the size of the +help database varies with each version of IRAF, this benchmark is only useful +for comparing the same version of IRAF run on different hosts, or the same +version run on a single host at different times. + +.nh 3 +Sequential Image Operators [IMADDS,IMADDR,IMSTATR,IMSHIFTR] + + These benchmarks measure the time required by typical image operations. +All tests should be performed on 512 square test images created with the +\fBimdebug\fR package. The \fBimages\fR package will already have been +loaded by the \fBbench\fR package. Enter the following commands to create +the test images. + +.nf + cl> imdebug + cl> mktest pix.s s 2 "512 512" + cl> mktest pix.r r 2 "512 512" +.fi + +The following benchmarks should be run on these test images. Delete the +output images after each benchmark is run. Each benchmark should be run +several times, discarding the first timing and averaging the remaining +timings for the final result. +.ls +.ls [IMADDS] +cl> $imarith pix.s + 5 pix2.s +.le +.ls [IMADDR] +cl> $imarith pix.r + 5 pix2.r +.le +.ls [IMSTATR] +cl> $imstat pix.r +.le +.ls [IMSHIFTR] +cl> $imshift pix.r pix2.r .33 .44 interp=spline3 +.le +.le + +The IMADD benchmarks test the efficiency of the image i/o system, including +binary file i/o, and provide an indication of how long a simple disk to disk +image operation takes on the system in question. This benchmark should be +i/o bound on most systems. The IMSTATR and IMSHIFTR benchmarks are expected +to be cpu bound, and test primarily the quality of the code generated by the +host Fortran compiler. Note that the IMSHIFTR benchmark employs a true two +dimensional bicubic spline, hence the timings are a factor of 4 greater than +one would expect if a one dimensional interpolator were used to shift the two +dimensional image. + +.nh 3 +Image Load [IMLOAD,IMLOADF] + + To run the image load benchmarks, first load the \fBtv\fR package and +display something to get the x_display.e process into the process cache. +Run the following two benchmarks, displaying the test image PIX.S (this image +contains a test pattern of no interest). +.ls +.ls [IMLOAD] +cl> $display pix.s 1 +.le +.ls [IMLOADF] +cl> $display pix.s 1 zt=none +.le +.le + +The IMLOAD benchmark measures how long it takes for a normal image load on +the host system, including the automatic determination of the greyscale +mapping, and the time required to map and clip the image pixels into the +8 bits (or whatever) displayable by the image display. This benchmark +measures primarily the cpu speed and i/o bandwidth of the host system. +The IMLOADF benchmark eliminates the cpu intensive greyscale transformation, +yielding the minimum image display time for the host system. + +.nh 3 +Image Transpose [IMTRAN] + + To run this benchmark, transpose the image PIX.S, placing the output in a +new image. + + cl> $imtran pix.s pix2.s + +This benchmark tests the ability of a process to grab a large amount of +physical memory (large working set), and the speed with which the host system +can service random rather than sequential file access requests. + +.nh 2 +Specialized Benchmarks + + The next few benchmarks are implemented as tasks in the \fBbench\fR package, +located in the directory "pkg$bench". This package is not installed as a +predefined package as the standard IRAF packages are. Since this package is +used infrequently the binaries may have been deleted; if the file x_bench.e is +not present in the \fIbench\fR directory, rebuild it as follows: + +.nf + cl> cd pkg$bench + cl> mkpkg +.fi + +To load the package, enter the following commands. It is not necessary to +\fIcd\fR to the bench directory to load or run the package. + +.nf + cl> task $bench = "pkg$bench/bench.cl" + cl> bench +.fi + +This defines the following benchmark tasks. There are no manual pages for +these tasks; the only documentation is what you are reading. + +.ks +.nf + fortask - foreign task execution + getpar - get parameter; tests IPC overhead + plots - make line plots from an image + ptime - no-op task (prints the clock time) + rbin - read binary file; tests FIO bandwidth + rrbin - raw (unbuffered) binary file read + rtext - read text file; tests text file i/o speed + subproc - subprocess connect/disconnect + wbin - write binary file; tests FIO bandwidth + wipc - write to IPC; tests IPC bandwidth + wtext - write text file; tests text file i/o speed +.fi +.ke + +.nh 3 +Subprocess Connect/Disconnect [SUBPR] + + To run the SUBPR benchmark, enter the following command. +This will connect and disconnect the x_images.e subprocess 10 times. +Difference the starting and final times printed as the task output to get +the results of the benchmark. The cpu time measurement may be meaningless +(very small) on some systems. + + cl> subproc 10 + +This benchmark measures the time required to connect and disconnect an +IRAF subprocess. This includes not only the host time required to spawn +and later shutdown a process, but also the time required by the IRAF VOS +to set up the IPC channels, initialize the VOS i/o system, initialize the +environment in the subprocess, and so on. A portion of the subprocess must +be paged into memory to execute all this initialization code. The host system +overhead to spawn a subprocess and fault in a portion of its address space +is a major factor in this benchmark. + +.nh 3 +IPC Overhead [IPCO] + + The \fBgetpar\fR task is a compiled task in x_bench.e. The task will +fetch the value of a CL parameter 100 times. + + cl> $getpar 100 + +Since each parameter access consists of a request sent to the CL by the +subprocess, followed by a response from the CL process, with a negligible +amount of data being transferred in each call, this tests the IPC overhead. + +.nh 3 +IPC Bandwidth [IPCB] + + To run this benchmark enter the following command. The \fBwipc\fR task +is a compiled task in x_bench.e. + + cl> $wipc 1E6 > dev$null + +This writes approximately 1 Mb of binary data via IPC to the CL, which discards +the data (writes it to the null file via FIO). Since no actual disk file i/o is +involved, this tests the efficiency of the IRAF pseudofile i/o system and of the +host system IPC facility. + +.nh 3 +Foreign Task Execution [FORTSK] + + To run this benchmark enter the following command. The \fBfortask\fR +task is a CL script task in the \fBbench\fR package. + + cl> fortask 10 + +This benchmark executes the standard IRAF foreign task \fBrmbin\fR (one of the +bootstrap utilities) 10 times. The task is called with no arguments and does +nothing other than execute, print out its "usage" message, and shut down. +This tests the time required to execute a host system task from within the +IRAF environment. Only the clock time measurement is meaningful. + +.nh 3 +Binary File I/O [WBIN,RBIN,RRBIN] + + To run these benchmarks, load the \fBbench\fR package, and then enter the +following commands. The \fBwbin\fR, \fBrbin\fR and \fBrrbin\fR tasks are +compiled tasks in x_bench.e. A binary file named BINFILE is created in the +current directory by WBIN, and should be deleted after the benchmark has been +run. Each benchmark should be run at least twice before recording the time +and moving on to the next benchmark. Successive calls to WBIN will +automatically delete the file and write a new one. + +.nf + cl> $wbin binfile 5E6 + cl> $rbin binfile + cl> $rrbin binfile + cl> delete binfile # (not part of the benchmark) +.fi + +These benchmarks measure the time required to write and then read a binary disk +file approximately 5 Mb in size. This benchmark measures the binary file i/o +bandwidth of the FIO interface (for sequential i/o). In WBIN and RBIN the +common buffered READ and WRITE requests are used, hence some memory to memory +copying is included in the overhead measured by the benchmark. The RRBIN +benchmark uses ZARDBF to read the file in chunks of 32768 bytes, giving an +estimate of the maximum i/o bandwidth for the system. + +.nh 3 +Text File I/O [WTEXT,RTEXT] + + To run these benchmarks, load the \fBbench\fR package, and then enter the +following commands. The \fBwtext\fR and \fBrtext\fR tasks are compiled tasks +in x_bench.e. A text file named TEXTFILE is created in the current directory +by WTEXT, and should be deleted after the benchmarks have been run. +Successive calls to WTEXT will automatically delete the file and write a new +one. + +.nf + cl> $wtext textfile 1E6 + cl> $rtext textfile + cl> delete textfile # (not part of the benchmark) +.fi + +These benchmarks measure the time required to write and then read a text disk +file approximately one megabyte in size (15,625 64 character lines). +This benchmark measures the efficiency with which the system can sequentially +read and write text files. Since text file i/o requires the system to pack +and unpack records, text i/o tends to be cpu bound. + +.nh 3 +Network I/O [NWBIN,NRBIN,NWNULL,NWTEXT,NRTEXT] + + These benchmarks are equivalent to the binary and text file benchmarks +just discussed, except that the binary and text files are accessed on a +remote node via the IRAF network interface. The calling sequences are +identical except that an IRAF network filename is given instead of referencing +a file in the current directory. For example, the following commands would +be entered to run the network binary file benchmarks on node LYRA (the node +name and filename are site dependent). + +.nf + cl> $wbin lyra!/tmp3/binfile 5E6 [NWBIN] + cl> $rbin lyra!/tmp3/binfile [NRBIN] + cl> $wbin lyra!/dev/null 5E6 [NWNULL] + cl> delete lyra!/tmp3/binfile +.fi + +The text file benchmarks are equivalent with the obvious changes, i.e., +substitute "text" for "bin", "textfile" for "binfile", and omit the null +textfile benchmark. The type of network interface used (TCP/IP, DECNET, etc.), +and the characteristics of the remote node should be recorded. + +These benchmarks test the bandwidth of the IRAF network interfaces for binary +and text files, as well as the limiting speed of the network itself (NWNULL). +The binary file benchmarks should be i/o bound. NWBIN should outperform +NRBIN since a network write is a pipelined operation, whereas a network read +is (currently) a synchronous operation. Text file access may be either cpu +or i/o bound depending upon the relative speeds of the network and host cpus. +The IRAF network interface buffers textfile i/o to minimize the number of +network packets and maximize the i/o bandwidth. + +.nh 3 +Task, IMIO, GIO Overhead [PLOTS] + + The \fBplots\fR task is a CL script task which calls the \fBprow\fR task +repeatedly to plot the same line of an image. The graphics output is +discarded (directed to the null file) rather than plotted since otherwise +the results of the benchmark would be dominated by the plotting speed of the +graphics terminal. + + cl> plots pix.s 10 + +This is a complex benchmark. The benchmark measures the overhead of task +(not process) execution and the overhead of the IMIO and GIO subsystems, +as well as the speed with which IPC can be used to pass parameters to a task +and return the GIO graphics metacode to the CL. + +The \fBprow\fR task is all overhead and is not normally used to interactively +plot image lines (\fBimplot\fR is what is normally used), but it is a good +task to use for a benchmark since it exercises the subsystems most commonly +used in scientific tasks. The \fBprow\fR task has a couple dozen parameters +(mostly hidden), must open the image to read the image line to be plotted +on every call, and must open the GIO graphics device on every call as well. + +.nh 3 +System Loading [2USER,4USER] + + This benchmark attempts to measure the response of the system as the +load increases. This is done by running large \fBplots\fR jobs on several +terminals and then repeating the 10 plots \fBplots\fR benchmark. +For example, to run the 2USER benchmark, login on a second terminal and +enter the following command, and then repeat the PLOTS benchmark discussed +in the last section. Be sure to use a different login or login directory +for each "user", to avoid concurrency problems, e.g., when reading the +input image or updating parameter files. + + cl> plots pix.s 9999 + +Theoretically, the timings should be approximately .5 (2USER) and .25 (4USER) +as fast as when the PLOTS benchmark was run on a single user system, assuming +that cpu time is the limiting resource and that a single job is cpu bound. +In a case where there is more than one limiting resource, e.g., disk seeks as +well as cpu cycles, performance will fall off more rapidly. If, on the other +hand, a single user process does not keep the system busy, e.g., because +synchronous i/o is used, performance will fall off less rapidly. If the +system unexpectedly runs out of some critical system resource, e.g., physical +memory or some internal OS buffer space, performance may be much worse than +expected. + +If the multiuser performance is poorer than expected it may be possible to +improve the system performance significantly once the reason for the poor +performance is understood. If disk seeks are the problem it may be possible +to distribute the load more evenly over the available disks. If the +performance decays linearly as more users are added and then gets really bad, +it is probably because some critical system resource has run out. Use the +system monitoring tools provided with the host operating system to try to +identify the critical resource. It may be possible to modify the system +tuning parameters to fix the problem, once the critical resource has been +identified. + +.nh +Interpreting the Benchmark Results + + Many factors determine the timings obtained when the benchmarks are run +on a system. These factors include all of the following: + +.ls +.ls o +The hardware configuration, e.g., cpu used, clock speed, availability of +floating point hardware, type of floating point hardware, amount of memory, +number and type of disks, degree of fragmentation of the disks, bus bandwidth, +disk controller bandwidth, memory controller bandwidth for memory mapped DMA +transfers, and so on. +.le +.ls o +The host operating system, including the version number, tuning parameters, +user quotas, working set size, files system parameters, Fortran compiler +characteristics, level of optimization used to compile IRAF, and so on. +.le +.ls o +The version of IRAF being run. On a VMS system, are the images "installed" +to permit shared memory and reduce physical memory usage? Were the programs +compiled with the code optimizer, and if so, what compiler options were used? +Are shared libraries used if available on the host system? +.le +.ls o +Other activity in the system when the benchmarks were run. If there were no +other users on the machine at the time, how about batch jobs? If the machine +is on a cluster or network, were other nodes accessing the same disks? +How many other processes were running on the local node? Ideally, the +benchmarks should be run on an otherwise idle system, else the results may be +meaningless or next to impossible to interpret. Given some idea of how the +host system responds to loading, it is possible to estimate how a timing +will scale as the system is loaded, but the reverse operation is much more +difficult. +.le +.le + + +Because so many factors contribute to the results of a benchmark, it can be +difficult to draw firm conclusions from any benchmark, no matter how simple. +The hardware and software in modern computer systems is so complicated that +it is difficult even for an expert with a detailed knowledge and understanding +of the full system to explain in detail where the time is going, even when +running the simplest benchmark. On some recent message based multiprocessor +systems it is probably impossible to fully comprehend what is going on at any +given time, even if one fully understands how the system works, because of the +dynamic nature of such systems. + +Despite these difficulties, the benchmarks do provide a coarse measure of the +relative performance of different host systems, as well as some indication of +the efficiency of the IRAF VOS. The benchmarks are designed to measure the +performance of the \fIhost system\fR (both hardware and software) in a number +of important areas, all of which play a role in determining the suitability of +a system for scientific data processing. The benchmarks are \fInot\fR +designed to measure the efficiency of the IRAF software itself (except parts +of the VOS), e.g., there is no measure of the time taken by the CL to compile +and execute a script, no measure of the speed of the median algorithm or of +an image transpose, and so on. These timings are also important, of course, +but should be measured separately. Also, measurements of the efficiency of +individual applications programs are much less critical than the performance +criteria dealt with here, since it is relatively easy to optimize an +inefficient or poorly designed applications program, even a complex one like +the CL, but there is generally little one can do about the host system. + +The timings for the benchmarks for a number of host systems are given in the +appendices which follow. Sometimes there will be more than one set of +benchmarks for a given host system, e.g., because the system provided two or +more disks or floating point options with different levels of performance. +The notes at the end of each set of benchmarks are intended to document any +special features or problems of the host system which may have affected the +results. In general we did not bother to record things like system tuning +parameters, working set, page faults, etc., unless these were considered an +important factor in the benchmarks. In particular, few IRAF programs page +fault other than during process startup, hence this is rarely a significant +factor when running these benchmarks (except possibly in IMTRAN). + +Detailed results for each configuration of each host system are presented on +separate pages in the Appendices. A summary table showing the results of +selected benchmarks for all host systems at once is also provided. +The system characteristic or characteristics principally measured by each +benchmark is noted in the table below. This is only approximate, e.g., the +MIPS rating is a significant factor in all but the most i/o bound benchmarks. + +.ks +.nf + benchmark responsiveness mips flops i/o + + CLSS * + MKPKGV * + MKHDB * * + PLOTS * * + IMADDS * * + IMADDR * * + IMSTATR * + IMSHIFTR * + IMTRAN * + WBIN * + RBIN * +.fi +.ke + + +By \fIresponsiveness\fR we refer to the interactive response of the system +as perceived by the user. A system with a good interactive response will do +all the little things very fast, e.g., directory listings, image header +listings, plotting from an image, loading new packages, starting up a new +process, and so on. Machines which score high in this area will seem fast +to the user, whereas machines which score poorly will \fIseem\fR slow, +sometimes frustratingly slow, even though they may score high in the areas +of floating point performance, or i/o bandwidth. The interactive response +of a system obviously depends upon the MIPS rating of the system (see below), +but an often more significant factor is the design and computational complexity +of the host operating system itself, in particular the time taken by the host +operating system to execute system calls. Any system which spends a large +fraction of its time in kernel mode will probably have poor interactive +response. The response of the system to loading is also very important, +i.e., if the system has trouble with load balancing as the number of users +(or processes) increases, response will become increasingly erratic until the +interactive response is hopelessly poor. + +The MIPS column refers to the raw speed of the system when executing arbitrary +code containing a mixture of various types of instructions, but little floating +point, i/o, or system calls. A machine with a high MIPS rating will have a +fast cpu, e.g., a fast clock rate, fast memory access time, large cache memory, +and so on, as well as a good optimizing Fortran compiler. Assuming good +compilers, the MIPS rating is primarily a measure of the hardware speed of +the host machine, but all of the MIPS related benchmarks presented here also +make a significant number of system calls (MKHDB, for example, does a lot of +files accesses and text file i/o), hence it is not that simple. Perhaps a +completely cpu bound pure-MIPS benchmark should be added to our suite of +benchmarks (the MIPS rating of every machine is generally well known, however). + +The FLOPS column identifies those benchmarks which do a significant amount of +floating point computation. The IMSHIFTR and IMSTATR benchmarks in particular +are heavily into floating point. These benchmarks measure the single +precision floating point speed of the host system hardware, as well as the +effectiveness of do-loop optimization by the host Fortran compiler. +The degree of optimization provided by the Fortran compiler can affect the +timing of these benchmarks by up to a factor of two. Note that the sample is +very small, and if a compiler fails to optimize the inner loop of one of these +benchmark programs, the situation may be reversed when running some other +benchmark. Any reasonable Fortran compiler should be able to optimize the +inner loop of the IMADDR benchmark, so the CPU timing for this benchmark is +a good measure of the hardware floating point speed, if one allows for do-loop +overhead, memory i/o, and the system calls necessary to access the image on +disk. + +The I/O column identifies those benchmarks which are i/o bound and which +therefore provide some indication of the i/o bandwidth of the host system. +The i/o bandwidth actually achieved in these benchmarks depends upon +many factors, the most important of which are the host operating system +software (files system data structures and i/o software, disk drivers, etc.) +and the host system hardware, i.e., disk type, disk controller type, bus +bandwidth, and DMA memory controller bandwidth. Note that asynchronous i/o +is not currently used in these benchmarks, hence higher transfer rates are +probably possible in special cases (on a busy system all i/o is asynchronous +at the host system level anyway). Large transfers are used to minimize disk +seeks and synchronization delays, hence the benchmarks should provide a good +measure of the realistically achievable host i/o bandwidth. + +.bp + . +.sp 20 +.ce +APPENDIX 1. IRAF VERSION 2.5 BENCHMARKS +.ce +April-June 1987 + +.bp +.sh +UNIX/IRAF V2.5 4.3BSD UNIX, 8Mb memory, VAX 11/750+FPA RA81 (lyra) +.br +CPU times are given in seconds, CLK times in minutes and seconds. +.br +Wednesday, 1 April, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 7.4+2.6 0:17 CPU = user + system +MKPKGV 13.4+9.9 0:39 CPU = user + system +MKPKGC 135.1+40. 3:46 CPU = user + system +MKHDB 22.79 0:40 [1] +IMADDS 3.31 0:10 512X512X16 +IMADDR 4.28 0:17 512X512X32 +IMSTATR 10.98 0:15 512X512X32 +IMSHIFTR 114.41 2:13 512X512X32 +IMLOAD 7.62 0:15 512X512X16 +IMLOADF 2.63 0:08 512X512X16 +IMTRAN 10.19 0:17 512X512X16 +SUBPR n/a 0:20 10 conn/discon 2.0 sec/proc +IPCO 0.92 0:07 100 getpars +IPCB 2.16 0:15 1E6 bytes 66.7 Kb/sec +FORTSK n/a 0:06 10 commands 0.6 sec/cmd +WBIN 4.32 0:24 5E6 bytes 208.3 Kb/sec +RBIN 4.08 0:24 5E6 bytes 208.3 Kb/sec +RRBIN 0.12 0:22 5E6 bytes 227.3 Kb/sec +WTEXT 37.30 0:42 1E6 bytes 23.8 Kb/sec +RTEXT 26.49 0:32 1E6 bytes 31.3 Kb/sec +NWBIN 4.64 1:43 5E6 bytes 48.5 Kb/sec [2] +NRBIN 6.49 1:34 5E6 bytes 53.2 Kb/sec [2] +NWNULL 4.91 1:21 5E6 bytes 61.7 Kb/sec [2] +NWTEXT 44.03 1:02 1E6 bytes 16.1 Kb/sec [2] +NRTEXT 31.38 2:04 1E6 bytes 8.1 Kb/sec [2] +PLOTS n/a 0:29 10 plots 2.9 sec/PROW +2USER n/a 0:44 10 plots 4.4 sec/PROW +4USER n/a 1:19 10 plots 7.9 sec/PROW +.fi + + +Notes: +.ls [1] +All cpu timings from MKHDB on do not include the "system" time. +.le +.ls [2] +The remote node used for the network tests was aquila, a VAX 11/750 running +4.3 BSD UNIX. The network protocol used was TCP/IP. +.le + +.bp +.sh +UNIX/IRAF V2.5 SUN UNIX 3.3, SUN 3/160C, (tucana) +.br +16 MHz 68020, 68881 fpu, 8Mb, 2-380Mb Fujitsu Eagle disks +.br +Friday, June 12, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 2.0+0.8 0:03 CPU = user + system +MKPKGV 3.2+4.5 0:17 CPU = user + system +MKPKGC 59.1+26.2 2:13 CPU = user + system +MKHDB 5.26 0:10 [1] +IMADDS 0.62 0:03 512X512X16 +IMADDR 3.43 0:09 512X512X32 +IMSTATR 8.38 0:11 512X512X32 +IMSHIFTR 83.44 1:33 512X512X32 +IMLOAD 6.78 0:11 512X512X16 +IMLOADF 1.21 0:03 512X512X16 +IMTRAN 1.47 0:05 512X512X16 +SUBPR n/a 0:07 10 conn/discon 0.7 sec/proc +IPCO 0.16 0:02 100 getpars +IPCB 0.70 0:05 1E6 bytes 200.0 Kb/sec +FORTSK n/a 0:02 10 commands 0.2 sec/cmd +WBIN 2.88 0:08 5E6 bytes 625.0 Kb/sec +RBIN 2.58 0:11 5E6 bytes 454.5 Kb/sec +RRBIN 0.01 0:10 5E6 bytes 500.0 Kb/sec +WTEXT 9.20 0:10 1E6 bytes 100.0 Kb/sec +RTEXT 6.75 0:07 1E6 bytes 142.8 Kb/sec +NWBIN 2.65 1:04 5E6 bytes 78.1 Kb/sec [2] +NRBIN 3.42 1:16 5E6 bytes 65.8 Kb/sec [2] +NWNULL 2.64 1:01 5E6 bytes 82.0 Kb/sec [2] +NWTEXT 11.92 0:39 1E6 bytes 25.6 Kb/sec [2] +NRTEXT 7.41 1:24 1E6 bytes 11.9 Kb/sec [2] +PLOTS n/a 0:09 10 plots 0.9 sec/PROW +2USER n/a 0:16 10 plots 1.6 sec/PROW +4USER n/a 0:35 10 plots 3.5 sec/PROW +.fi + + +Notes: +.ls [1] +All timings from MKHDB on do not include the "system" time. +.le +.ls [2] +The remote node used for the network tests was aquila, a VAX 11/750 +running 4.3BSD UNIX. The network protocol used was TCP/IP. +.le + +.bp +.sh +UNIX/IRAF V2.5 SUN UNIX 3.3, SUN 3/160C + FPA (KPNO 4 meter system) +.br +16 MHz 68020, Sun-3 FPA, 8Mb, 2-380Mb Fujitsu Eagle disks +.br +Friday, June 12, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 1.9+0.7 0:04 CPU = user + system +MKPKGV 3.1+3.9 0:19 CPU = user + system +MKPKGC 66.2+20.3 2:06 CPU = user + system +MKHDB 5.30 0:11 [1] +IMADDS 0.63 0:03 512X512X16 +IMADDR 0.86 0:06 512X512X32 +IMSTATR 5.08 0:08 512X512X32 +IMSHIFTR 31.06 0:36 512X512X32 +IMLOAD 2.76 0:06 512X512X16 +IMLOADF 1.22 0:03 512X512X16 +IMTRAN 1.46 0:04 512X512X16 +SUBPR n/a 0:06 10 conn/discon 0.6 sec/proc +IPCO 0.16 0:01 100 getpars +IPCB 0.60 0:05 1E6 bytes 200.0 Kb/sec +FORTSK n/a 0:02 10 commands 0.2 sec/cmd +WBIN 2.90 0:07 5E6 bytes 714.3 Kb/sec +RBIN 2.54 0:11 5E6 bytes 454.5 Kb/sec +RRBIN 0.03 0:10 5E6 bytes 500.0 Kb/sec +WTEXT 9.20 0:11 1E6 bytes 90.9 Kb/sec +RTEXT 6.70 0:08 1E6 bytes 125.0 Kb/sec +NWBIN n/a +NRBIN n/a [3] +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS n/a 0:06 10 plots 0.6 sec/PROW +2USER n/a 0:10 10 plots 1.0 sec/PROW +4USER n/a 0:26 10 plots 2.6 sec/PROW +.fi + + +Notes: +.ls [1] +All timings from MKHDB on do not include the "system" time. +.le + +.bp +.sh +UNIX/IRAF V2.5, SUN UNIX 3.2, SUN 3/160 (taurus) +.br +16 MHz 68020, Sun-3 FPA, 16 Mb, SUN SMD disk 280 Mb +.br +7 April 1987, Skip Schaller, Steward Observatory, University of Arizona + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS 01.2+01.1 0:03 +MKPKGV 03.2+10.1 0:18 +MKPKGC 65.4+25.7 2:03 +MKHDB 5.4 0:18 +IMADDS 0.6 0:04 512x512x16 +IMADDR 0.9 0:07 512x512x32 +IMSTATR 11.4 0:13 512x512x32 +IMSHIFTR 30.1 0:34 512x512x32 +IMLOAD (not available) +IMLOADF (not available) +IMTRAN 1.4 0:04 512x512x16 +SUBPR - 0:07 10 conn/discon 0.7 sec/proc +IPCO 0.1 0:02 100 getpars +IPCB 0.8 0:05 1E6 bytes 200.0 Kb/sec +FORTSK - 0:03 10 commands 0.3 sec/cmd +WBIN 2.7 0:14 5E6 bytes 357.1 Kb/sec +RBIN 2.5 0:09 5E6 bytes 555.6 Kb/sec +RRBIN 0.1 0:06 5E6 bytes 833.3 Kb/sec +WTEXT 9.0 0:10 1E6 bytes 100.0 Kb/sec +RTEXT 6.4 0:07 1E6 bytes 142.9 Kb/sec +NWBIN 2.8 1:08 5E6 bytes 73.5 Kb/sec +NRBIN 3.1 1:25 5E6 bytes 58.8 Kb/sec +NWNULL 2.7 0:55 5E6 bytes 90.9 Kb/sec +NWTEXT 12.3 0:44 1E6 bytes 22.7 Kb/sec +NRTEXT 7.7 1:45 1E6 bytes 9.5 Kb/sec +PLOTS - 0:07 10 plots 0.7 sec/PROW +2USER - 0:13 +4USER - 0:35 +.fi + + +Notes: +.ls [1] +The remote node used for the network tests was carina, a VAX 11/750 +running 4.3 BSD UNIX. The network protocol used was TCP/IP. +.le + +.bp +.sh +Integrated Solutions (ISI), Lick Observatory +.br +16-Mhz 68020, 16-Mhz 68881 fpu, 8Mb Memory +.br +IRAF compiled with Greenhills compilers without -O optimization +.br +Thursday, 14 May, 1987, Richard Stover, Lick Observatory + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS 1.6+0.7 0:03 +MKPKGV 3.1+4.6 0:25 +MKPKGC 40.4+11.6 1:24 +MKHDB 6.00 0:17 +IMADDS 0.89 0:05 512X512X16 +IMADDR 3.82 0:10 512X512X32 +IMSTATR 7.77 0:10 512X512X32 +IMSHIFTR 81.60 1:29 512X512X32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 1.62 0:06 512X512X16 +SUBPR n/a 0:05 10 donn/discon 0.5 sec/proc +IPCO 0.27 0:02 100 getpars +IPCB 1.50 0:08 1E6 bytes 125.0 Kb/sec +FORTSK n/a 0:13 10 commands 1.3 sec/cmd +WBIN 4.82 0:17 5E6 bytes 294.1 Kb/sec +RBIN 4.63 0:18 5E6 bytes 277.8 Kb/sec +RRBIN 0.03 0:13 5E6 bytes 384.6 Kb/sec +WTEXT 17.10 0:19 1E6 bytes 45.5 Kb/sec +RTEXT 7.40 0:08 1E6 bytes 111.1 Kb/sec +NWBIN n/a +NRBIN n/a +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS n/a 0:10 10 plots 1.0 sec/PROW +2USER n/a +4USER n/a +.fi + + +Notes: +.ls [1] +An initial attempt to bring IRAF up on the ISI using the ISI C and Fortran +compilers failed due to there being too many bugs in these compilers, so +the system was brought up using the Greenhills compilers. +.le + +.bp +.sh +ULTRIX/IRAF V2.5, ULTRIX 1.2, VAXStation II/GPX (gll1) +.br +5Mb memory, 150 Mb RD54 disk +.br +Thursday, 21 May, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 4.2+1.8 0:09 CPU = user + system +MKPKGV 9.8+6.1 0:37 CPU = user + system +MKPKGC 96.8+24.4 3:15 CPU = user + system +MKHDB 15.50 0:38 [1] +IMADDS 2.06 0:09 512X512X16 +IMADDR 2.98 0:17 512X512X32 +IMSTATR 10.98 0:16 512X512X32 +IMSHIFTR 95.61 1:49 512X512X32 +IMLOAD 6.90 0:17 512X512X16 [2] +IMLOADF 2.58 0:10 512X512X16 [2] +IMTRAN 4.93 0:16 512X512X16 +SUBPR n/a 0:19 10 conn/discon 1.9 sec/proc +IPCO 0.47 0:03 100 getpars +IPCB 1.21 0:07 1E6 bytes 142.9 Kb/sec +FORTSK n/a 0:08 10 commands 0.8 sec/cmd +WBIN 1.97 0:29 5E6 bytes 172.4 Kb/sec +RBIN 1.73 0:24 5E6 bytes 208.3 Kb/sec +RRBIN 0.08 0:24 5E6 bytes 208.3 Kb/sec +WTEXT 25.43 0:27 1E6 bytes 37.0 Kb/sec +RTEXT 16.65 0:18 1E6 bytes 55.5 Kb/sec +NWBIN 2.24 1:26 5E6 bytes 58.1 Kb/sec [3] +NRBIN 2.66 1:43 5E6 bytes 48.5 Kb/sec [3] +NWNULL 2.22 2:21 5E6 bytes 35.5 Kb/sec [3] +NWTEXT 27.16 2:43 1E6 bytes 6.1 Kb/sec [3] +NRTEXT 17.44 2:17 1E6 bytes 7.3 Kb/sec [3] +PLOTS n/a 0:20 10 plots 2.0 sec/PROW +2USER n/a 0:30 10 plots 3.0 sec/PROW +4USER n/a 0:51 10 plots 5.1 sec/PROW +.fi + + +Notes: +.ls [1] +All cpu timings from MKHDB on do not include the "system" time. +.le +.ls [2] +Since there is no image display on this node, the image display benchmarks +were run using the IIS display on node lyra via the network interface. +.le +.ls [3] +The remote node used for the network tests was lyra, a VAX 11/750 running +4.3 BSD UNIX. The network protocol used was TCP/IP. +.le +.ls [4] +Much of the hardware and software for this system was provided courtesy of +DEC so that we may better support IRAF on the microvax. +.le + +.bp +.sh +VMS/IRAF V2.5, VMS V4.5, 28Mb, VAX 8600 RA81/Clustered (draco) +.br +Friday, 15 May, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 2.87 0:08 +MKPKGV 33.57 1:05 +MKPKGC 3.26 1:16 +MKHDB 8.59 0:17 +IMADDS 1.56 0:05 512X512X16 +IMADDR 1.28 0:07 512X512X32 +IMSTATR 2.09 0:04 512X512X32 +IMSHIFTR 13.54 0:32 512X512X32 +IMLOAD 2.90 0:10 512X512X16 [1] +IMLOADF 1.04 0:08 512X512X16 [1] +IMTRAN 2.58 0:06 512X512X16 +SUBPR n/a 0:27 10 conn/discon 2.7 sec/proc +IPCO 0.00 0:02 100 getpars +IPCB 0.04 0:06 1E6 bytes 166.7 Kb/sec +FORTSK n/a 0:13 10 commands 1.3 sec/cmd +WBIN 1.61 0:17 5E6 bytes 294.1 Kb/sec +RBIN 1.07 0:08 5E6 bytes 625.0 Kb/sec +RRBIN 0.34 0:08 5E6 bytes 625.0 Kb/sec +WTEXT 10.62 0:17 1E6 bytes 58.8 Kb/sec +RTEXT 4.64 0:06 1E6 bytes 166.7 Kb/sec +NWBIN 2.56 2:00 5E6 bytes 41.7 Kb/sec [2] +NRBIN 5.67 1:57 5E6 bytes 42.7 Kb/sec [2] +NWNULL 2.70 1:48 5E6 bytes 46.3 Kb/sec [2] +NWTEXT 12.06 0:47 1E6 bytes 21.3 Kb/sec [2] +NRTEXT 10.10 1:41 1E6 bytes 9.9 Kb/sec [2] +PLOTS n/a 0:09 10 plots 0.9 sec/PROW +2USER n/a 0:10 10 plots 1.0 sec/PROW +4USER n/a 0:18 10 plots 1.8 sec/PROW +.fi + + +Notes: +.ls [1] +The image display was accessed via the network (IRAF TCP/IP network interface, +Wollongong TCP/IP package for VMS), with the IIS image display residing on +node lyra and accessed via a UNIX/IRAF kernel server. The binary and text +file network tests also used lyra as the remote node. +.le +.ls [2] +The remote node for network benchmarks was aquila, a VAX 11/750 running +4.3BSD UNIX. Connection made via TCP/IP. +.le +.ls [3] +The system was linked using shared libraries and the IRAF executables for +the cl and system tasks, as well as the shared library, were "installed" +using the VMS INSTALL utility. +.le +.ls [4] +The high value of the IPC bandwidth for VMS is due to the use of shared +memory. Mailboxes were considerably slower and are no longer used. +.le +.ls [5] +The foreign task interface uses mailboxes to talk to a DCL run as a +subprocess and should be considerably faster than it is. It is slow at +present due to the need to call SET MESSAGE before and after the user +command to disable pointless DCL error messages having to do with +logical names. +.le + +.bp +.sh +VMS/IRAF V2.5, VAX 11/780, VMS V4.5, 16Mb memory, RA81 disks (wfpct1) +.br +Tuesday, 19 May, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS 7.94 0:15 +MKPKGV 102.49 2:09 +MKPKGC 9.50 2:22 +MKHDB 26.10 0:31 +IMADDS 3.57 0:10 512X512X16 +IMADDR 4.22 0:17 512X512X32 +IMSTATR 6.78 0:10 512X512X32 +IMSHIFTR 45.11 0:57 512X512X32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 7.83 0:14 512X512X16 +SUBPR n/a 0:53 10 donn/discon 5.3 sec/proc +IPCO 0.02 0:03 100 getpars +IPCB 0.17 0:10 1E6 bytes 100.0 Kb/sec +FORTSK n/a 0:20 10 commands 2.0 sec/cmd +WBIN 4.52 0:30 5E6 bytes 166.7 Kb/sec +RBIN 3.90 0:19 5E6 bytes 263.2 Kb/sec +RRBIN 1.23 0:17 5E6 bytes 294.1 Kb/sec +WTEXT 37.99 0:50 1E6 bytes 20.0 Kb/sec +RTEXT 18.52 0:19 1E6 bytes 52.6 Kb/sec +NWBIN n/a +NRBIN n/a +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS n/a 0:19 10 plots 1.9 sec/PROW +2USER n/a 0:31 10 plots 3.1 sec/PROW +4USER n/a 1:04 10 plots 6.4 sec/PROW +.fi + + +Notes: +.ls [1] +The Unibus interface used for the RA81 disks for these benchmarks is +notoriously slow, hence the i/o bandwidth of the system as tested was +probably significantly worse than many sites would experience (using +disks on the faster Massbus interface). +.le + +.bp +.sh +VMS/IRAF V2.5, VAX 11/780, VMS V4.5 (wfpct1) +.br +16Mb memory, IRAF installed on RA81 disks, data on RM03/Massbus [1]. +.br +Tuesday, 9 June, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS n/a +MKPKGV n/a +MKPKGC n/a +MKHDB n/a +IMADDS 3.38 0:08 512X512X16 +IMADDR 4.00 0:11 512X512X32 +IMSTATR 6.88 0:08 512X512X32 +IMSHIFTR 45.47 0:53 512X512X32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 7.71 0:12 512X512X16 +SUBPR n/a +IPCO n/a +IPCB n/a +FORTSK n/a +WBIN 4.22 0:22 5E6 bytes 227.3 Kb/sec +RBIN 3.81 0:12 5E6 bytes 416.7 Kb/sec +RRBIN 0.98 0:09 5E6 bytes 555.6 Kb/sec +WTEXT 37.20 0:47 1E6 bytes 21.3 Kb/sec +RTEXT 17.95 0:18 1E6 bytes 55.6 Kb/sec +NWBIN n/a +NRBIN n/a +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS n/a 0:16 10 plots 1.6 sec/PROW +2USER +4USER +.fi + +Notes: +.ls [1] +The data files were stored on an RM03 with 23 free Mb and a Massbus interface +for these benchmarks. Only those benchmarks which access the RM03 are given. +.le + +.bp +.sh +VMS/IRAF V2.5, MicroVMS 4.5, VAXStation II/GPX (gll1) +.br +5Mb memory, 70Mb RD53 plus 300 Mb Maxstor with Emulex controller. +.br +Wednesday, 13 May, 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS 9.66 0:17 +MKPKGV 109.26 2:16 +MKPKGC 9.25 2:53 +MKHDB 27.58 0:39 +IMADDS 3.51 0:07 512X512X16 +IMADDR 4.31 0:10 512X512X32 +IMSTATR 9.31 0:11 512X512X32 +IMSHIFTR 74.54 1:21 512X512X32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 10.81 0:27 512X512X16 +SUBPR n/a 0:53 10 conn/discon 5.3 sec/proc +IPCO 0.03 0:03 100 getpars +IPCB 0.13 0:07 1E6 bytes 142.8 Kb/sec +FORTSK n/a 0:29 10 commands 2.9 sec/cmd +WBIN 3.29 0:16 5E6 bytes 312.5 Kb/sec +RBIN 2.38 0:10 5E6 bytes 500.0 Kb/sec +RRBIN 0.98 0:09 5E6 bytes 555.5 Kb/sec +WTEXT 41.00 0:53 1E6 bytes 18.9 Kb/sec +RTEXT 28.74 0:29 1E6 bytes 34.5 Kb/sec +NWBIN 8.28 0:46 5E6 bytes 108.7 Kb/sec [1] +NRBIN 5.66 0:50 5E6 bytes 100.0 Kb/sec [1] +NWNULL 8.39 0:42 5E6 bytes 119.0 Kb/sec [1] +NWTEXT 30.21 0:33 1E6 bytes 30.3 Kb/sec [1] +NRTEXT 20.05 0:38 1E6 bytes 26.3 Kb/sec [1] +PLOTS 0:16 10 plots 1.6 sec/plot +2USER 0:26 10 plots 2.6 sec/plot +4USER +.fi + +Notes: +.ls [1] +The remote node for the network tests was draco, a VAX 8600 running +V4.5 VMS. The network protocol used was DECNET. +.le +.ls [2] +Much of the hardware and software for this system was provided courtesy of +DEC so that we may better support IRAF on the microvax. +.le + +.bp +.sh +VMS/IRAF V2.5, MicroVMS 4.5, VAXStation II/GPX (gll1) +.br +5 Mb memory, IRAF on 300 Mb Maxstor/Emulex, data on 70 Mb RD53 [1]. +.br +Sunday, 31 May, 1987, Suzanne H. Jacoby, NOAO/Tucson. + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (user+sys) (m:ss) + +CLSS n/a n/a +MKPKGV n/a n/a +MKPKGC n/a n/a +MKHDB n/a n/a +IMADDS 3.44 0:07 512X512X16 +IMADDR 4.31 0:15 512X512X32 +IMSTATR 9.32 0:12 512X512X32 +IMSHIFTR 74.72 1:26 512X512X32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 10.83 0:35 512X512X16 +SUBPR n/a +IPCO n/a +IPCB n/a +FORTSK n/a +WBIN 3.33 0:26 5E6 bytes 192.3 Kb/sec +RBIN 2.30 0:17 5E6 bytes 294.1 Kb/sec +RRBIN 0.97 0:11 5E6 bytes 294.1 Kb/sec +WTEXT 40.84 0:54 1E6 bytes 18.2 Kb/sec +RTEXT 27.99 0:28 1E6 bytes 35.7 Kb/sec +NWBIN n/a +NRBIN n/a +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS 0:17 10 plots 1.7 sec/plot +2USER n/a +4USER n/a +.fi + + +Notes: +.ls [1] +IRAF installed on a 300 Mb Maxstor with Emulax controller; data files on a +70Mb RD53. Only those benchmarks which access the RD53 disk are included +below. +.le + +.bp +.sh +VMS/IRAF V2.5, VMS V4.5, VAX 11/750+FPA RA81/Clustered, 7.25 Mb (vela) +.br +Friday, 15 May 1987, Suzanne H. Jacoby, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 14.11 0:27 +MKPKGV 189.67 4:17 +MKPKGC 18.08 3:44 +MKHDB 46.54 1:11 +IMADDS 5.90 0:11 512X512X16 +IMADDR 6.48 0:14 512X512X32 +IMSTATR 10.65 0:14 512X512X32 +IMSHIFTR 69.62 1:33 512X512X32 +IMLOAD 15.83 0:23 512X512X16 +IMLOADF 6.08 0:13 512X512X16 +IMTRAN 14.85 0:20 512X512X16 +SUBPR n/a 1:54 10 conn/discon 11.4 sec/proc +IPCO 1.16 0:06 100 getpars +IPCB 2.92 0:09 1E6 bytes 111.1 Kb/sec +FORTSK n/a 0:33 10 commands 3.3 sec/cmd +WBIN 6.96 0:21 5E6 bytes 238.1 Kb/sec +RBIN 5.37 0:13 5E6 bytes 384.6 Kb/sec +RRBIN 1.86 0:10 5E6 bytes 500.0 Kb/sec +WTEXT 66.12 1:24 1E6 bytes 11.9 Kb/sec +RTEXT 32.06 0:36 1E6 bytes 27.7 Kb/sec +NWBIN 13.53 1:49 5E6 bytes 45.9 Kb/sec [1] +NRBIN 19.52 2:06 5E6 bytes 39.7 Kb/sec [1] +NWNULL 13.40 1:44 5E6 bytes 48.1 Kb/sec [1] +NWTEXT 82.35 1:42 1E6 bytes 9.8 Kb/sec [1] +NRTEXT 63.00 2:39 1E6 bytes 6.3 Kb/sec [1] +PLOTS n/a 0:25 10 plots 2.5 sec/PROW +2USER n/a 0:53 10 plots 5.3 sec/PROW +4USER n/a 1:59 10 plots 11.9 sec/PROW +.fi + + +Notes: +.ls [1] +The remote node for network benchmarks was aquila, a VAX 11/750 running +4.3BSD UNIX. Connection made via TCP/IP. +.le +.ls [2] +The interactive response of this system seemed to decrease markedly when it +was converted to 4.X VMS and is currently pretty marginal, even on a single +user 11/750. In interactive applications which make frequent system calls the +system tends to spend much of the available cpu time in kernel mode even if +there are only a few active users. +.le +.ls [2] +Compare the 2USER and 4USER timings with those for the UNIX 11/750. This +benchmark is characteristic of the two systems. No page faulting was evident +on the VMS 11/750 during the multiuser benchmarks. It took much longer to +run the 4USER benchmark on the VMS 750, as the set up time was much longer +once one or two other PLOTS jobs were running. The UNIX machine, on the other +hand, seemed almost as fast (or as slow) as usual, even with the PLOTS jobs +running on the other terminals. +.le +.ls [4] +The high value of the IPC bandwidth for VMS is due to the use of shared +memory. Mailboxes were considerably slower and are no longer used. +.le +.ls [5] +The foreign task interface uses mailboxes to talk to a DCL run as a subprocess +and should be considerably faster than it is. It is slow at present due to +the need to call SET MESSAGE before and after the user command to disable +pointless DCL error messages having to do with logical names. +.le + +.bp +.sh +AOSVS/IRAF V2.5, AOSVS 7.54, Data General MV 10000 (solpl) +.br +24Mb, 2-600 Mb ARGUS disks and 2-600 Mb KISMET disks +.br +17 April 1987, Skip Schaller, Steward Observatory, University of Arizona + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (sec) (m:ss) +CLSS 2.1 0:14 [1] +MKPKGV 9.6 0:29 +MKPKGC n/a 3:43 +MKHDB 6.4 0:25 +IMADDS 1.5 0:06 512x512x16 +IMADDR 1.6 0:08 512x512x32 +IMSTATR 4.8 0:07 512x512x32 +IMSHIFTR 39.3 0:47 512x512x32 +IMLOAD 3.1 0:08 512x512x16 [2] +IMLOADF 0.8 0:06 512x512x16 [2] +IMTRAN 2.9 0:06 512x512x16 +SUBPR n/a 0:36 10 conn/discon 3.6 sec/proc +IPCO 0.4 0:03 100 getpars +IPCB 0.9 0:07 1E6 bytes 142.9 Kb/sec +FORTSK n/a 0:17 10 commands 1.7 sec/cmd +WBIN 1.7 0:56 5E6 bytes 89.3 Kb/sec [3] +RBIN 1.7 0:25 5E6 bytes 200.0 Kb/sec [3] +RRBIN 0.5 0:27 5E6 bytes 185.2 Kb/sec [3] +WTEXT 12.7 0:25 1E6 bytes 40.0 Kb/sec [3] +RTEXT 8.4 0:13 1E6 bytes 76.9 Kb/sec [3] +CSTC 0.0 0:00 5E6 bytes [4] +WSTC 1.9 0:11 5E6 bytes 454.5 Kb/sec +RSTC 1.5 0:11 5E6 bytes 454.5 Kb/sec +RRSTC 0.1 0:10 5E6 bytes 500.0 Kb/sec +NWBIN 2.0 1:17 5E6 bytes 64.9 Kb/sec [5] +NRBIN 2.1 2:34 5E6 bytes 32.5 Kb/sec +NWNULL 2.0 1:15 5E6 bytes 66.7 Kb/sec +NWTEXT 15.1 0:41 1E6 bytes 24.4 Kb/sec +NRTEXT 8.7 0:55 1E6 bytes 18.2 Kb/sec +PLOTS n/a 0:09 10 plots 0.9 sec/PROW +2USER n/a 0:12 +4USER n/a 0:20 +.fi + + +Notes: +.ls [1] +The CLSS given is for a single user on the system. With one user already +logged into IRAF, the CLSS was 0:10. +.le +.ls [2] +These benchmarks were measured on the CTI system, an almost identically +configured MV/10000, with an IIS Model 75. +.le +.ls [3] +I/O throughput depends heavily on the element size of an AOSVS file. For +small element sizes, the throughput is roughly proportional to the element +size. I/O throughput in general could improve when IRAF file i/o starts +using double buffering and starts taking advantage of the asynchronous +definition of the kernel i/o drivers. +.le +.ls [4] +These static file benchmarks are not yet official IRAF benchmarks, but are +analogous to the binary file benchmarks. Since they use the supposedly +more efficient static file driver, they should give a better representation +of the true I/O throughput of the system. Since these are the drivers used +for image I/O, they represent the I/O throughput for the bulk image files. +.le +.ls [5] +The remote node used for the network tests was taurus, a SUN 3-160 +running SUN/UNIX 3.2. The network protocol used was TCP/IP. +.le + +.bp +.sh +AOSVS/IRAF V2.5, Data General MV 8000 (CTIO La Serena system) +.br +5Mb memory (?), 2 large DG disks plus 2 small Winchesters [1] +.br +17 April 1987, Doug Tody, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes\fR + (sec) (m:ss) +CLSS n/a 0:28 [2] +MKPKGV n/a 2:17 +MKPKGC n/a 6:38 +MKHDB 13.1 0:57 +IMADDS 2.9 0:12 512x512x16 +IMADDR 3.1 0:17 512x512x32 +IMSTATR 9.9 0:13 512x512x32 +IMSHIFTR 77.7 1:31 512x512x32 +IMLOAD n/a +IMLOADF n/a +IMTRAN 5.69 0:12 512x512x16 +SUBPR n/a 1:01 10 conn/discon 6.1 sec/proc +IPCO 0.6 0:04 100 getpars +IPCB 2.1 0:13 1E6 bytes 76.9 Kb/sec +FORTSK n/a 0:31 10 commands 3.1 sec/cmd +WBIN 5.0 2:41 5E6 bytes 31.1 Kb/sec +RBIN 2.4 0:25 5E6 bytes 200.0 Kb/sec +RRBIN 0.8 0:28 5E6 bytes 178.6 Kb/sec +WTEXT 24.75 0:57 1E6 bytes 17.5 Kb/sec +RTEXT 23.92 0:30 1E6 bytes 33.3 Kb/sec +NWBIN n/a +NRBIN n/a +NWNULL n/a +NWTEXT n/a +NRTEXT n/a +PLOTS n/a 0:16 10 plots 1.6 sec/PROW +2USER n/a 0:24 10 plots 2.4 sec/PROW +4USER +.fi + + +Notes: +.ls [1] +These benchmarks were run with the disks very nearly full and badly +fragmented, hence the i/o performance of the system was much worse than it +might otherwise be. +.le +.ls [2] +The CLSS given is for a single user on the system. With one user already +logged into IRAF, the CLSS was 0:18. +.le + +.bp + . +.sp 20 +.ce +APPENDIX 2. IRAF VERSION 2.2 BENCHMARKS +.ce +March 1986 + +.bp +.sh +UNIX/IRAF V2.2 4.2BSD UNIX, VAX 11/750+FPA RA81 (lyra) +.br +CPU times are given in seconds, CLK times in minutes and seconds. +.br +Saturday, 22 March, D. Tody, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 06.8+04.0 0:13 +MKPKGV 24.5+26.0 1:11 +MKPKGC 160.5+67.4 4:33 +MKHDB 25.1+? 0:41 +IMADDS 3.3+? 0:08 512x512x16 +IMADDR 4.4 0:15 512x512x32 +IMSTATR 23.6 0:29 512x512x32 +IMSHIFTR 116.3 2:14 512x512x32 +IMLOAD 9.6 0:15 512x512x16 +IMLOADF 3.9 0:08 512x512x16 +IMTRAN 9.8 0:16 512x512x16 +SUBPR - 0:28 10 conn/discon 2.8 sec/proc +IPCO 1.3 0:08 100 getpars +IPCB 2.5 0:16 1E6 bytes 62.5 Kb/sec +FORTSK 4.4 0:22 10 commands 2.2 sec/cmd +WBIN 4.8 0:23 5E6 bytes 217.4 Kb/sec +RBIN 4.4 0:22 5E6 bytes 227.3 Kb/sec +RRBIN 0.2 0:20 5E6 bytes 250.0 Kb/sec +WTEXT 37.2 0:43 1E6 bytes 23.2 Kb/sec +RTEXT 32.2 0:37 1E6 bytes 27.2 Kb/sec +NWBIN 5.1 2:01 5E6 bytes 41.3 Kb/sec +NRBIN 8.3 2:13 5E6 bytes 37.6 Kb/sec +NWNULL 5.1 1:55 5E6 bytes 43.5 Kb/sec +NWTEXT 40.5 1:15 1E6 bytes 13.3 Kb/sec +NRTEXT 24.8 2:15 1E6 bytes 7.4 Kb/sec +PLOTS - 0:25 10 plots 2.5 clk/PROW +2USER - 0:43 +4USER - 1:24 +.fi + + +Notes: +.ls [1] +All cpu timings from MKHDB on do not include the "system" time. +.le +.ls [2] +4.3BSD UNIX, due out shortly, reportedly differs from 4.2 mostly in that +a number of efficiency improvements have been made. These benchmarks will +be rerun as soon as 4.3BSD becomes available. +.le +.ls [3] +In UNIX/IRAF V2.2, IPC communications are implemented with pipes which +are really sockets (a much more sophisticated mechanism than we need), +which accounts for the relatively low IPC bandwidth. +.le +.ls [4] +The remote node used for the network tests was aquila, a VAX 11/750 running +4.2 BSD UNIX. The network protocol used was TCP/IP. +.le +.ls [5] +The i/o bandwidth to disk should be improved dramatically when we implement +the planned "static file driver" for UNIX. This will provide direct, +asynchronous i/o for large preallocated binary files which do not change +in size after creation. The use of the global buffer cache by the UNIX +read and write system services is the one major shortcoming of the UNIX +system for image processing applications. +.le + +.bp +.sh +VMS/IRAF V2.2, VMS V4.3, VAX 11/750+FPA RA81/Clustered (vela) +.br +Wednesday, 26 March, D. Tody, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 14.4 0:40 +MKPKGV 260.0 6:05 +MKPKGC - 4:51 +MKHDB 40.9 1:05 +IMADDS 6.4 0:10 512x512x16 +IMADDR 6.5 0:13 512x512x32 +IMSTATR 15.8 0:18 512x512x32 +IMSHIFTR 68.2 1:17 512x512x32 +IMLOAD 10.6 0:15 512x512x16 +IMLOADF 4.1 0:07 512x512x16 +IMTRAN 14.4 0:20 512x512x16 +SUBPR - 1:03 10 conn/discon 6 sec/subpr +IPCO 1.4 0:06 100 getpars +IPCB 2.8 0:07 1E6 bytes 143 Kb/sec +FORTSK - 0:35 10 commands 3.5 sec/cmd +WBIN (ra81)Cl 6.7 0:20 5E6 bytes 250 Kb/sec +RBIN (ra81)Cl 5.1 0:12 5E6 bytes 417 Kb/sec +RRBIN (ra81)Cl 1.8 0:10 5E6 bytes 500 Kb/sec +WBIN (rm80) 6.8 0:17 5E6 bytes 294 Kb/sec +RBIN (rm80) 5.1 0:13 5E6 bytes 385 Kb/sec +RRBIN (rm80) 1.8 0:09 5E6 bytes 556 Kb/sec +WTEXT 65.6 1:19 1E6 bytes 13 Kb/sec +RTEXT 32.5 0:34 1E6 bytes 29 Kb/sec +NWBIN (not available) +NRBIN (not available) +NWNULL (not available) +NWTEXT (not available) +NRTEXT (not available) +PLOTS - 0:24 10 plots +2USER - 0:43 +4USER - 2:13 response was somewhat erratic +.fi + + +Notes: + +.ls [1] +The interactive response of this system seemed to decrease markedly either +when it was converted to 4.x VMS or when it was clustered with our 8600. +In interactive applications which involve a lot of process spawns and other +system calls, the system tends to spend about half of the available cpu time +in kernel mode even if there are only a few active users. These problems +are much less noticeable on an 8600 or even on a 780, hence one wonders if +VMS has perhaps become too large and complicated for the relatively slow 11/750, +at least when used in a VAX-cluster configuration. +.le +.ls [2] +Compare the 2USER and 4USER timings with those for the UNIX 11/750. This +benchmark is characteristic of the two systems. No page faulting was evident +on the VMS 11/750 during the multiuser benchmarks. It took much longer to +run the 4USER benchmark on the VMS 750, as the set up time was much longer +once one or two other PLOTS jobs were running. The UNIX machine, on the other +hand, seemed almost as fast (or as slow) as usual, even with the PLOTS jobs +running on the other terminals. +.le +.ls [3] +The RA81 was clustered with the 8600, whereas the RM80 was directly connected +to the 11/750. +.le +.ls [4] +The high value of the IPC bandwidth for VMS is due to the use of shared +memory. Mailboxes were considerably slower and are no longer used. +.le +.ls [5] +The foreign task interface uses mailboxes to talk to a DCL run as a subprocess +and should be considerably faster than it is. It is slow at present due to +the need to call SET MESSAGE before and after the user command to disable +pointless DCL error messages having to do with logical names. +.le + +.bp +.sh +VMS/IRAF V2.2, VMS V4.3, VAX 8600 RA81/Clustered (draco) +.br +Saturday, 22 March, D. Tody, NOAO/Tucson + +.nf +\fBBenchmark CPU CLK Size Notes \fR + (user+sys) (m:ss) + +CLSS 2.4 0:08 +MKPKGV 48.0 1:55 +MKPKGC - 1:30 +MKHDB 7.1 0:21 +IMADDS 1.2 0:04 512x512x16 +IMADDR 1.5 0:08 512x512x32 +IMSTATR 3.0 0:05 512x512x32 +IMSHIFTR 13.6 0:20 512x512x32 +IMLOAD 2.8 0:07 512x512x16 via TCP/IP to lyra +IMLOADF 1.3 0:07 512x512x16 via TCP/IP to lyra +IMTRAN 3.2 0:07 512x512x16 +SUBPR - 0:26 10 conn/discon 2.6 sec/proc +IPCO 0.0 0:02 100 getpars +IPCB 0.3 0:07 1E6 bytes 142.9 Kb/sec +FORTSK - 0:13 10 commands 1.3 sec/cmd +WBIN (RA81)Cl 1.3 0:13 5E6 bytes 384.6 Kb/sec +RBIN (RA81)Cl 1.1 0:08 5E6 bytes 625.0 Kb/sec +RRBIN (RA81)Cl 0.3 0:07 5E6 bytes 714.0 Kb/sec +WTEXT 10.7 0:20 1E6 bytes 50.0 Kb/sec +RTEXT 5.2 0:05 1E6 bytes 200.0 Kb/sec +NWBIN 1.8 1:36 5E6 bytes 52.1 Kb/sec +NRBIN 8.0 2:06 5E6 bytes 39.7 Kb/sec +NWNULL 2.5 1:20 5E6 bytes 62.5 Kb/sec +NWTEXT 6.5 0:43 1E6 bytes 23.3 Kb/sec +NRTEXT 5.9 1:39 1E6 bytes 10.1 Kb/sec +PLOTS - 0:06 10 plots 0.6 sec/PROW +2USER - 0:08 +4USER - 0:14 +.fi + + +Notes: + +.ls [1] +Installed images were not used for these benchmarks; the CLSS timing +should be slightly improved if the CL image is installed. +.le +.ls [2] +The image display was accessed via the network (IRAF TCP/IP network interface, +Wollongong TCP/IP package for VMS), with the IIS image display residing on +node lyra and accessed via a UNIX/IRAF kernel server. The binary and text +file network tests also used lyra as the remote node. +.le +.ls [3] +The high value of the IPC bandwidth for VMS is due to the use of shared +memory. Mailboxes were considerably slower and are no longer used. +.le +.ls [4] +The foreign task interface uses mailboxes to talk to a DCL run as a +subprocess and should be considerably faster than it is. It is slow at +present due to the need to call SET MESSAGE before and after the user +command to disable pointless DCL error messages having to do with +logical names. +.le +.ls [5] +The cpu on the 8600 is so fast, compared to the fairly standard VAX i/o +channels, that most tasks are i/o bound. The system can therefore easily +support several heavy users before much degradation in performance is seen +(provided they access data stored on different disks to avoid a disk seek +bottleneck). This is borne out in the 2USER and 4USER benchmarks shown above. +The cpu did not become saturated until the fourth user was added in this +particular benchmark. +.le diff --git a/pkg/bench/bench.ms b/pkg/bench/bench.ms new file mode 100644 index 00000000..1dc6ebf7 --- /dev/null +++ b/pkg/bench/bench.ms @@ -0,0 +1,788 @@ +.RP +.TL +A Set of Benchmarks for Measuring IRAF System Performance +.AU +Doug Tody +.AI +.K2 "" "" "*" +March 1986 +.br +(Revised July 1987) + +.AB +.ti 0.75i +This paper presents a set of benchmarks for measuring the performance of +IRAF as installed on a particular host system. The benchmarks serve two +purposes: [1] they provide an objective means of comparing the performance of +different IRAF host systems, and [2] the benchmarks may be repeated as part of +the IRAF installation procedure to verify that the expected performance is +actually being achieved. While the benchmarks chosen are sometimes complex, +i.e., at the level of actual applications programs and therefore difficult to +interpret in detail, some effort has been made to measure all the important +performance characteristics of the host system. These include the raw cpu +speed, the floating point processing speed, the i/o bandwidth to disk, and a +number of characteristics of the host operating system as well, e.g., the +efficiency of common system calls, the interactive response of the system, +and the response of the system to loading. The benchmarks are discussed in +detail along with instructions for benchmarking a new system, followed by +tabulated results of the benchmarks for a number of IRAF host machines. +.AE + +.pn 1 +.bp +.ce +\fBContents\fR +.sp 3 +.sp +1.\h'|0.4i'\fBIntroduction\fP\l'|5.6i.'\0\01 +.sp +2.\h'|0.4i'\fBWhat is Measured\fP\l'|5.6i.'\0\02 +.sp +3.\h'|0.4i'\fBThe Benchmarks\fP\l'|5.6i.'\0\03 +.br +\h'|0.4i'3.1.\h'|0.9i'Host Level Benchmarks\l'|5.6i.'\0\03 +.br +\h'|0.9i'3.1.1.\h'|1.5i'CL Startup/Shutdown [CLSS]\l'|5.6i.'\0\03 +.br +\h'|0.9i'3.1.2.\h'|1.5i'Mkpkg (verify) [MKPKGV]\l'|5.6i.'\0\04 +.br +\h'|0.9i'3.1.3.\h'|1.5i'Mkpkg (compile) [MKPKGC]\l'|5.6i.'\0\04 +.br +\h'|0.4i'3.2.\h'|0.9i'IRAF Applications Benchmarks\l'|5.6i.'\0\04 +.br +\h'|0.9i'3.2.1.\h'|1.5i'Mkhelpdb [MKHDB]\l'|5.6i.'\0\05 +.br +\h'|0.9i'3.2.2.\h'|1.5i'Sequential Image Operators [IMADD, IMSTAT, etc.]\l'|5.6i.'\0\05 +.br +\h'|0.9i'3.2.3.\h'|1.5i'Image Load [IMLOAD,IMLOADF]\l'|5.6i.'\0\05 +.br +\h'|0.9i'3.2.4.\h'|1.5i'Image Transpose [IMTRAN]\l'|5.6i.'\0\06 +.br +\h'|0.4i'3.3.\h'|0.9i'Specialized Benchmarks\l'|5.6i.'\0\06 +.br +\h'|0.9i'3.3.1.\h'|1.5i'Subprocess Connect/Disconnect [SUBPR]\l'|5.6i.'\0\07 +.br +\h'|0.9i'3.3.2.\h'|1.5i'IPC Overhead [IPCO]\l'|5.6i.'\0\07 +.br +\h'|0.9i'3.3.3.\h'|1.5i'IPC Bandwidth [IPCB]\l'|5.6i.'\0\07 +.br +\h'|0.9i'3.3.4.\h'|1.5i'Foreign Task Execution [FORTSK]\l'|5.6i.'\0\07 +.br +\h'|0.9i'3.3.5.\h'|1.5i'Binary File I/O [WBIN,RBIN,RRBIN]\l'|5.6i.'\0\07 +.br +\h'|0.9i'3.3.6.\h'|1.5i'Text File I/O [WTEXT,RTEXT]\l'|5.6i.'\0\08 +.br +\h'|0.9i'3.3.7.\h'|1.5i'Network I/O [NWBIN,NRBIN,etc.]\l'|5.6i.'\0\08 +.br +\h'|0.9i'3.3.8.\h'|1.5i'Task, IMIO, GIO Overhead [PLOTS]\l'|5.6i.'\0\09 +.br +\h'|0.9i'3.3.9.\h'|1.5i'System Loading [2USER,4USER]\l'|5.6i.'\0\09 +.sp +4.\h'|0.4i'\fBInterpreting the Benchmark Results\fP\l'|5.6i.'\0\010 +.sp +\fBAppendix A: IRAF Version 2.5 Benchmarks\fP +.sp +\fBAppendix B: IRAF Version 2.2 Benchmarks\fP + +.nr PN 0 +.bp +.NH +Introduction +.PP +This set of benchmarks has been prepared with a number of purposes in mind. +Firstly, the benchmarks may be run after installing IRAF on a new system to +verify that the performance expected for that machine is actually being +achieved. In general, this cannot be taken for granted since the performance +actually achieved on a particular system may depend upon how the system +is configured and tuned. Secondly, the benchmarks may be run to compare +the performance of different IRAF hosts, or to track the system performance +over a period of time as improvements are made, both to IRAF and to the host +system. Lastly, the benchmarks provide a metric which can be used to tune +the host system. +.PP +All too often, the only benchmarks run on a system are those which test the +execution time of optimized code generated by the host Fortran compiler. +This is primarily a hardware benchmark and secondarily a test of the Fortran +optimizer. An example of this type of test is the famous Linpack benchmark. +.PP +The numerical execution speed test is an important benchmark but it tests only +one of the many factors contributing to the overall performance of the system +as perceived by the user. In interactive use other factors are often more +important, e.g., the time required to spawn or communicate with a subprocess, +the time required to access a file, the response of the system as the number +of users (or processes) increases, and so on. While the quality of optimized +code is significant for cpu intensive batch processing, other factors are +often more important for sophisticated interactive applications. +.PP +The benchmarks described here are designed to test, as fully as possible, +the major factors contributing to the overall performance of the IRAF system +on a particular host. A major factor in the timings of each benchmark is +of course the IRAF system itself, but comparisons of different hosts are +nonetheless possible since the code is virtually identical on all hosts +(the applications and VOS are in fact identical on all hosts). +The IRAF kernel (OS interface) is coded differently for each host operating +system, but the functions performed by the kernel are identical on each host, +and since the kernel is a very "thin" layer the kernel code itself is almost +always a negligible factor in the final timings. +.PP +The IRAF version number, host operating system and associated version number, +and the host computer hardware configuration are all important in interpreting +the results of the benchmarks, and should always be recorded. + +.NH +What is Measured +.PP +Each benchmark measures two quantities, the total cpu time required to +execute the benchmark, and the total (wall) clock time required to execute the +benchmark. If the clock time measurement is to be of any value the benchmarks +must be run on a single user system. Given this "best time" measurement +and some idea of how the system responds to loading, it is not difficult to +estimate the performance to be expected on a loaded system. +.PP +The total cpu time required to execute a benchmark consists of the "user" time +plus the "system" time. The "user" time is the cpu time spent executing +the instructions comprising the user (IRAF) program, i.e., any instructions +in procedures linked directly into the process being executed. The "system" +time is the cpu time spent in kernel mode executing the system services called +by the user program. On some systems there is no distinction between the two +types of timings, with the system time either being included in the measured +cpu time, or omitted from the timings. If the benchmark involves several +concurrent processes no cpu time measurement of the subprocesses may be +possible on some systems. +.PP +When possible we give both measurements, while in some cases only the user +time is given, or only the sum of the user and system times. The cpu time +measurements are therefore only directly comparable between different +operating systems for the simpler benchmarks, in particular those which make +few system calls. The cpu measurements given \fIare\fR accurate for the same +operating system (e.g., some version of UNIX) running on different hosts, +and may be used to compare such systems. Reliable comparisions between +different operating systems are also possible, but only if one thoroughly +understands what is going on. +.PP +The clock time measurement includes both the user and system times, plus the +time spent waiting for i/o. Any minor system daemon processes executing while +the benchmarks are being run may bias the clock time measurement slightly, +but since these are a constant part of the host environment it is fair to +include them in the timings. Major system daemons which run infrequently +(e.g., the print symbiont in VMS) should invalidate the benchmark. +.PP +Assuming an otherwise idle system, a comparison of the cpu and clock times +tells whether the benchmark was cpu bound or i/o bound. Those benchmarks +involving compiled IRAF tasks do not include the process startup and pagein +times (these are measured by a different benchmark), hence the task should be +run once before running the benchmark to connect the subprocess and page in +the memory used by the task. A good procedure to follow is to run each +benchmark once to start the process, and then repeat the benchmark three times, +averaging the results. If inconsistent results are obtained further iterations +and/or monitoring of the host system are called for until a consistent result +is achieved. +.PP +Many benchmarks depend upon disk performance as well as compute cycles. +For such a benchmark to be a meaningful measure of the i/o bandwidth of the +system it is essential that no other users (or batch jobs) be competing for +disk seeks on the disk used for the test file. There are subtle things to +watch out for in this regard, for example, if the machine is in a VMS cluster +or on a local area network, processes on other nodes may be accessing the +local disk, yet will not show up on a user login or process list on the local +node. It is always desirable to repeat each test several times or on several +different disk devices, to ensure that no outside requests were being serviced +while the benchmark was being run. If the system has disk monitoring utilities +use these to find an idle disk before running any benchmarks which do heavy i/o. +.PP +Beware of disks which are nearly full; the maximum achievable i/o bandwidth +may fall off rapidly as a disk fills up, due to disk fragmentation (the file +must be stored in little pieces scattered all over the physical disk). +Similarly, many systems (VMS, AOS/VS, V7 and Sys V UNIX, but not Berkeley UNIX) +suffer from disk fragmentation problems that gradually worsen as a files system +ages, requiring that the disk periodically be backed off onto tape and then +restored to render the files and free spaces as contiguous as possible. +In some cases, disk fragmentation can cause the maximum achievable i/o +bandwidth to degrade by an order of magnitude. For example, on a VMS system +one can use \fLCOPY/CONTIGUOUS\fR to render files contiguous (e.g., this can +be done on all the executables in \fL[IRAF.BIN]\fR after installing the +system, to speed process pagein times). If the copy fails for a large file +even though there is substantial free space left on the disk, the disk is +badly fragmented. + +.NH +The Benchmarks +.PP +Instructions are given for running each benchmark, and the operations +performed by each benchmark are briefly described. The system characteristics +measured by the benchmark are briefly discussed. A short mnemonic name is +associated with each benchmark to identify it in the tables given in the +appendices, tabulating the results for actual host machines. + +.NH 2 +Host Level Benchmarks +.PP +The benchmarks discussed in this section are run at the host system level. +The examples are given for the UNIX cshell, under the assumption that a host +dependent example is better than none at all. These commands must be +translated by the user to run the benchmarks on a different system +(hint: use \fLSHOW STATUS\fR or a stop watch to measure wall clock times +on a VMS host). +.NH 3 +CL Startup/Shutdown [CLSS] +.PP +Go to the CL login directory (any directory containing a \fLLOGIN.CL\fR file), +mark the time (the method by which this is done is system dependent), +and startup the CL. Enter the "logout" command while the CL is starting up +so that the CL will not be idle (with the clock running) while the command +is being entered. Mark the final cpu and clock time and compute the +difference. +.DS +\fL% time cl +logout\fR +.DE +.LP +This is a complex benchmark but one which is of obvious importance to the +IRAF user. The benchmark is probably dominated by the cpu time required to +start up the CL, i.e., start up the CL process, initialize the i/o system, +initialize the environment, interpret the CL startup file, interpret the +user LOGIN.CL file, connect and disconnect the x_system.e subprocess, and so on. +Most of the remaining time is the overhead of the host operating system for +the process spawns, page faults, file accesses, and so on. +\fIDo not use a customized \fLLOGIN.CL\fP file when running this benchmark\fR, +or the timings will almost certainly be affected. +.NH 3 +Mkpkg (verify) [MKPKGV] +.PP +Go to the PKG directory and enter the (host system equivalent of the) +following command. The method by which the total cpu and clock times are +computed is system dependent. +.DS +\fL% cd $iraf/pkg +% time mkpkg -n\fR +.DE +.LP +This benchmark does a "no execute" make-package of the entire PKG suite of +applications and systems packages. This tests primarily the speed with which +the host system can read directories, resolve pathnames, and return directory +information for files. Since the PKG directory tree is continually growing, +this benchmark is only useful for comparing the same version of IRAF run on +different hosts, or the same version of IRAF on the same host at different +times. +.NH 3 +Mkpkg (compile) [MKPKGC] +.PP +Go to the directory "iraf$pkg/bench/xctest" and enter the (host system +equivalents of the) following commands. The method by which the total cpu +and clock times are computed is system dependent. Only the \fBmkpkg\fR +command should be timed. +.DS +\fL +% cd $iraf/pkg/bench/xctest +% mkpkg clean # delete old library, etc., if present +% time mkpkg +% mkpkg clean # delete newly created binaries\fR +.DE +.LP +This tests the time required to compile and link a small IRAF package. +The timings reflect the time required to preprocess, compile, optimize, +and assemble each module and insert it into the package library, then link +the package executable. The host operating system overhead for the process +spawns, page faults, etc. is also a major factor. If the host system +provides a shared library facility this will significantly affect the link +time, hence the benchmark should be run linking both with and without shared +libraries to make a fair comparison to other systems. Linking against a +large library is fastest if the library is topologically sorted and stored +contiguously on disk. + +.NH 2 +IRAF Applications Benchmarks +.PP +The benchmarks discussed in this section are run from within the IRAF +environment, using only standard IRAF applications tasks. The cpu and clock +times of any (compiled) IRAF task may be measured by prefixing the task name +with a $ when the command is entered into the CL, as shown in the examples. +The significance of the cpu time measurement is not precisely defined for +all systems. On a UNIX host, it is the "user" cpu time used by the task. +On a VMS host, there does not appear to be any distinction between the user +and system times (probably because the system services execute in the context +of the calling process), hence the cpu time given probably includes both, +but probably excludes the time for any services executing in ancillary +processes, e.g., for RMS. +.NH 3 +Mkhelpdb [MKHDB] +.PP +The \fBmkhelpdb\fR task is in the \fBsoftools\fR package. The function of +the task is to scan the tree of ".hd" help-directory files and compile the +binary help database. +.DS +\fLcl> softools +cl> $mkhelpdb +.DE +.LP +This benchmark tests the speed of the host files system and the efficiency of +the host system services and text file i/o, as well as the global optimization +of the Fortran compiler and the MIPS rating of the host machine. +Since the size of the help database varies with each version of IRAF, +this benchmark is only useful for comparing the same version of IRAF run +on different hosts, or the same version run on a single host at different +times. Note than any additions to the base IRAF system (e.g., SDAS) will +increase the size of the help database and affect the timings. +.NH 3 +Sequential Image Operators [IMADDS,IMADDR,IMSTATR,IMSHIFTR] +.PP +These benchmarks measure the time required by typical image operations. +All tests should be performed on 512 square test images created with the +\fBimdebug\fR package. The \fBimages\fR and \fBimdebug\fR packages should +be loaded. Enter the following commands to create the test images. +.DS +\fLcl> mktest pix.s s 2 "512 512" +cl> mktest pix.r r 2 "512 512"\fR +.DE +.LP +The following benchmarks should be run on these test images. Delete the +output images after each benchmark is run. If you enter the commands shown +once, the command can be repeated by typing \fL^\fR followed by return. +Each benchmark should be run several times, discarding the first timing and +averaging the remaining timings for the final result. +.DS +.TS +l l. +[IMADDS] \fLcl> $imarith pix.s + 5 pix2.s; imdel pix2.s\fR +[IMADDR] \fLcl> $imarith pix.r + 5 pix2.r; imdel pix2.r\fR +[IMSTATR] \fLcl> $imstat pix.r\fR +[IMSHIFTR] \fLcl> $imshift pix.r pix2.r .33 .44 interp=spline3\fR +.TE +.DE +.LP +The IMADD benchmarks test the efficiency of the image i/o system, including +binary file i/o, and provide an indication of how long a simple disk to disk +image operation takes on the system in question. This benchmark should be +i/o bound on most systems. The IMSTATR and IMSHIFTR benchmarks are normally +cpu bound, and test primarily the speed of the host cpu and floating point +unit, and the quality of the code generated by the host Fortran compiler. +Note that the IMSHIFTR benchmark employs a true two dimensional bicubic spline, +hence the timings are a factor of 4 greater than one would expect if a one +dimensional interpolator were used to shift the two dimensional image. +.NH 3 +Image Load [IMLOAD,IMLOADF] +.PP +To run the image load benchmarks, first load the \fBtv\fR package and +display something to get the x_display.e process into the process cache. +Run the following two benchmarks, displaying the test image PIX.S (this image +contains a test pattern of no interest). +.DS +.TS +l l. +[IMLOAD] \fLcl> $display pix.s 1\fR +[IMLOADF] \fLcl> $display pix.s 1 zt=none\fR +.TE +.DE +.LP +The IMLOAD benchmark measures how long it takes for a normal image load on +the host system, including the automatic determination of the greyscale +mapping, and the time required to map and clip the image pixels into the +8 bits (or whatever) displayable by the image display. This benchmark +measures primarily the cpu speed and i/o bandwidth of the host system. +The IMLOADF benchmark eliminates the cpu intensive greyscale transformation, +yielding the minimum image display time for the host system. +.NH 3 +Image Transpose [IMTRAN] +.PP +To run this benchmark, transpose the image PIX.S, placing the output in a +new image. +.DS +\fLcl> $imtran pix.s pix2.s\fR +.DE +.LP +This benchmark tests the ability of a process to grab a large amount of +physical memory (large working set), and the speed with which the host system +can service random rather than sequential file access requests. The user +working set should be large enough to avoid excessive page faulting. + +.NH 2 +Specialized Benchmarks +.PP +The next few benchmarks are implemented as tasks in the \fBbench\fR package, +located in the directory "pkg$bench". This package is not installed as a +predefined package as the standard IRAF packages are. Since this package is +used infrequently the binaries may have been deleted; if the file x_bench.e is +not present in the \fIbench\fR directory, rebuild it as follows: +.DS +\fLcl> cd pkg$bench +cl> mkpkg\fR +.DE +.LP +To load the package, enter the following commands. It is not necessary to +\fIcd\fR to the bench directory to load or run the package. +.DS +\fLcl> task $bench = "pkg$bench/bench.cl" +cl> bench +.DE +.LP +This defines the following benchmark tasks. There are no manual pages for +these tasks; the only documentation is what you are reading. +.DS +.TS +l l. +FORTASK - foreign task execution +GETPAR - get parameter; tests IPC overhead +PLOTS - make line plots from an image +RBIN - read binary file; tests FIO bandwidth +RRBIN - raw (unbuffered) binary file read +RTEXT - read text file; tests text file i/o speed +SUBPROC - subprocess connect/disconnect +WBIN - write binary file; tests FIO bandwidth +WIPC - write to IPC; tests IPC bandwidth +WTEXT - write text file; tests text file i/o speed +.TE +.DE +.NH 3 +Subprocess Connect/Disconnect [SUBPR] +.PP +To run the SUBPR benchmark, enter the following command. +This will connect and disconnect the x_images.e subprocess 10 times. +Difference the starting and final times printed as the task output to get +the results of the benchmark. The cpu time measurement may be meaningless +(very small) on some systems. +.DS +\fLcl> subproc 10\fR +.DE +This benchmark measures the time required to connect and disconnect an +IRAF subprocess. This includes not only the host time required to spawn +and later shutdown a process, but also the time required by the IRAF VOS +to set up the IPC channels, initialize the VOS i/o system, initialize the +environment in the subprocess, and so on. A portion of the subprocess must +be paged into memory to execute all this initialization code. The host system +overhead to spawn a subprocess and fault in a portion of its address space +is a major factor in this benchmark. +.NH 3 +IPC Overhead [IPCO] +.PP +The \fBgetpar\fR task is a compiled task in x_bench.e. The task will +fetch the value of a CL parameter 100 times. +.DS +\fLcl> $getpar 100\fR +.DE +Since each parameter access consists of a request sent to the CL by the +subprocess, followed by a response from the CL process, with a negligible +amount of data being transferred in each call, this tests the IPC overhead. +.NH 3 +IPC Bandwidth [IPCB] +.PP +To run this benchmark enter the following command. The \fBwipc\fR task +is a compiled task in x_bench.e. +.DS +\fLcl> $wipc 1E6 > dev$null\fR +.DE +This writes approximately 1 Mb of binary data via IPC to the CL, which discards +the data (writes it to the null file via FIO). Since no actual disk file i/o is +involved, this tests the efficiency of the IRAF pseudofile i/o system and of the +host system IPC facility. +.NH 3 +Foreign Task Execution [FORTSK] +.PP +To run this benchmark enter the following command. The \fBfortask\fR +task is a CL script task in the \fBbench\fR package. +.DS +\fLcl> fortask 10\fR +.DE +This benchmark executes the standard IRAF foreign task \fBrmbin\fR (one of the +bootstrap utilities) 10 times. The task is called with no arguments and does +nothing other than execute, print out its "usage" message, and shut down. +This tests the time required to execute a host system task from within the +IRAF environment. Only the clock time measurement is meaningful. +.NH 3 +Binary File I/O [WBIN,RBIN,RRBIN] +.PP +To run these benchmarks, make sure the \fBbench\fR package is loaded, and enter +the following commands. The \fBwbin\fR, \fBrbin\fR and \fBrrbin\fR tasks are +compiled tasks in x_bench.e. A binary file named BINFILE is created in the +current directory by WBIN, and should be deleted after the benchmark has been +run. Each benchmark should be run at least twice before recording the time +and moving on to the next benchmark. Successive calls to WBIN will +automatically delete the file and write a new one. +.PP +\fINOTE:\fR it is wise to create the test file on a files system which has +a lot of free space available, to avoid disk fragmentation problems. +Also, if the host system has two or more different types of disk drives +(or disk controllers or bus types), you may wish to run the benchmark +separately for each drive. +.DS +\fLcl> $wbin binfile 5E6 +cl> $rbin binfile +cl> $rrbin binfile +cl> delete binfile # (not part of the benchmark)\fR +.DE +.LP +These benchmarks measure the time required to write and then read a binary disk +file approximately 5 Mb in size. This benchmark measures the binary file i/o +bandwidth of the FIO interface (for sequential i/o). In WBIN and RBIN the +common buffered READ and WRITE requests are used, hence some memory to memory +copying is included in the overhead measured by the benchmark. A large FIO +buffer is used to minimize disk seeks and synchronization delays; somewhat +faster timings might be possible by increasing the size of the buffer +(this is not a user controllable option, and is not possible on all host +systems). The RRBIN benchmark uses ZARDBF to read the file in chunks of +32768 bytes, giving an estimate of the maximum i/o bandwidth for the system. +.NH 3 +Text File I/O [WTEXT,RTEXT] +.PP +To run these benchmarks, load the \fBbench\fR package, and then enter the +following commands. The \fBwtext\fR and \fBrtext\fR tasks are compiled tasks +in x_bench.e. A text file named TEXTFILE is created in the current directory +by WTEXT, and should be deleted after the benchmarks have been run. +Successive calls to WTEXT will automatically delete the file and write a new +one. +.DS +\fLcl> $wtext textfile 1E6 +cl> $rtext textfile +cl> delete textfile # (not part of the benchmark)\fR +.DE +.LP +These benchmarks measure the time required to write and then read a text disk +file approximately one megabyte in size (15,625 64 character lines). +This benchmark measures the efficiency with which the system can sequentially +read and write text files. Since text file i/o requires the system to pack +and unpack records, text i/o tends to be cpu bound. +.NH 3 +Network I/O [NWBIN,NRBIN,NWNULL,NWTEXT,NRTEXT] +.PP +These benchmarks are equivalent to the binary and text file benchmarks +just discussed, except that the binary and text files are acccessed on a +remote node via the IRAF network interface. The calling sequences are +identical except that an IRAF network filename is given instead of referencing +a file in the current directory. For example, the following commands would +be entered to run the network binary file benchmarks on node LYRA (the node +name and filename are site dependent). +.DS +\fLcl> $wbin lyra!/tmp3/binfile 5E6 \fR[NWBIN]\fL +cl> $rbin lyra!/tmp3/binfile \fR[NRBIN]\fL +cl> $wbin lyra!/dev/null 5E6 \fR[NWNULL]\fL +cl> delete lyra!/tmp3/binfile\fR +.DE +.LP +The text file benchmarks are equivalent with the obvious changes, i.e., +substitute "text" for "bin", "textfile" for "binfile", and omit the null +textfile benchmark. The type of network interface used (TCP/IP, DECNET, etc.), +and the characteristics of the remote node should be recorded. +.PP +These benchmarks test the bandwidth of the IRAF network interfaces for binary +and text files, as well as the limiting speed of the network itself (NWNULL). +The binary file benchmarks should be i/o bound. NWBIN should outperform +NRBIN since a network write is a pipelined operation, whereas a network read +is (currently) a synchronous operation. Text file access may be either cpu +or i/o bound depending upon the relative speeds of the network and host cpus. +The IRAF network interface buffers textfile i/o to minimize the number of +network packets and maximize the i/o bandwidth. +.NH 3 +Task, IMIO, GIO Overhead [PLOTS] +.PP +The \fBplots\fR task is a CL script task which calls the \fBprow\fR task +repeatedly to plot the same line of an image. The graphics output is +discarded (directed to the null file) rather than plotted since otherwise +the results of the benchmark would be dominated by the plotting speed of the +graphics terminal. +.DS +\fLcl> plots pix.s 10\fR +.DE +This is a complex benchmark. The benchmark measures the overhead of task +(not process) execution and the overhead of the IMIO and GIO subsystems, +as well as the speed with which IPC can be used to pass parameters to a task +and return the GIO graphics metacode to the CL. +.PP +The \fBprow\fR task is all overhead and is not normally used to interactively +plot image lines (\fBimplot\fR is what is normally used), but it is a good +task to use for a benchmark since it exercises the subsystems most commonly +used in scientific tasks. The \fBprow\fR task has a couple dozen parameters +(mostly hidden), must open the image to read the image line to be plotted +on every call, and must open the GIO graphics device on every call as well. +.NH 3 +System Loading [2USER,4USER] +.PP +This benchmark attempts to measure the response of the system as the +load increases. This is done by running large \fBplots\fR jobs on several +terminals and then repeating the 10 plots \fBplots\fR benchmark. +For example, to run the 2USER benchmark, login on a second terminal and +enter the following command, and then repeat the PLOTS benchmark discussed +in the last section. Be sure to use a different login or login directory +for each "user", to avoid concurrency problems, e.g., when reading the +input image or updating parameter files. +.DS +\fLcl> plots pix.s 9999\fR +.DE +Theoretically, the timings should be approximately .5 (2USER) and .25 (4USER) +as fast as when the PLOTS benchmark was run on a single user system, assuming +that cpu time is the limiting resource and that a single job is cpu bound. +In a case where there is more than one limiting resource, e.g., disk seeks as +well as cpu cycles, performance will fall off more rapidly. If, on the other +hand, a single user process does not keep the system busy, e.g., because +synchronous i/o is used, performance will fall off less rapidly. If the +system unexpectedly runs out of some critical system resource, e.g., physical +memory or some internal OS buffer space, performance may be much worse than +expected. +.PP +If the multiuser performance is poorer than expected it may be possible to +improve the system performance significantly once the reason for the poor +performance is understood. If disk seeks are the problem it may be possible +to distribute the load more evenly over the available disks. If the +performance decays linearly as more users are added and then gets really bad, +it is probably because some critical system resource has run out. Use the +system monitoring tools provided with the host operating system to try to +identify the critical resource. It may be possible to modify the system +tuning parameters to fix the problem, once the critical resource has been +identified. + +.NH +Interpreting the Benchmark Results +.PP +Many factors determine the timings obtained when the benchmarks are run +on a system. These factors include all of the following: +.sp +.RS +.IP \(bu +The hardware configuration, e.g., cpu used, clock speed, availability of +floating point hardware, type of floating point hardware, amount of memory, +number and type of disks, degree of fragmentation of the disks, bus bandwidth, +disk controller bandwidth, memory controller bandwidth for memory mapped DMA +transfers, and so on. +.IP \(bu +The host operating system, including the version number, tuning parameters, +user quotas, working set size, files system parameters, Fortran compiler +characteristics, level of optimization used to compile IRAF, and so on. +.IP \(bu +The version of IRAF being run. On a VMS system, are the images "installed" +to permit shared memory and reduce physical memory usage? Were the programs +compiled with the code optimizer, and if so, what compiler options were used? +Are shared libraries used if available on the host system? +.IP \(bu +Other activity in the system when the benchmarks were run. If there were no +other users on the machine at the time, how about batch jobs? If the machine +is on a cluster or network, were other nodes accessing the same disks? +How many other processes were running on the local node? Ideally, the +benchmarks should be run on an otherwise idle system, else the results may be +meaningless or next to impossible to interpret. Given some idea of how the +host system responds to loading, it is possible to estimate how a timing +will scale as the system is loaded, but the reverse operation is much more +difficult. +.RE +.sp +.PP +Because so many factors contribute to the results of a benchmark, it can be +difficult to draw firm conclusions from any benchmark, no matter how simple. +The hardware and software in modern computer systems is so complicated that +it is difficult even for an expert with a detailed knowledge and understanding +of the full system to explain in detail where the time is going, even when +running the simplest benchmark. On some recent message based multiprocessor +systems it is probably impossible to fully comprehend what is going on at any +given time, even if one fully understands how the system works, because of the +dynamic nature of such systems. +.PP +Despite these difficulties, the benchmarks do provide a coarse measure of the +relative performance of different host systems, as well as some indication of +the efficiency of the IRAF VOS. The benchmarks are designed to measure the +performance of the \fIhost system\fR (both hardware and software) in a number +of important areas, all of which play a role in determining the suitability of +a system for scientific data processing. The benchmarks are \fInot\fR +designed to measure the efficiency of the IRAF software itself (except parts +of the VOS), e.g., there is no measure of the time taken by the CL to compile +and execute a script, no measure of the speed of the median algorithm or of +an image transpose, and so on. These timings are also important, of course, +but should be measured separately. Also, measurements of the efficiency of +individual applications programs are much less critical than the performance +criteria dealt with here, since it is relatively easy to optimize an +inefficient or poorly designed applications program, even a complex one like +the CL, but there is generally little one can do about the host system. +.PP +The timings for the benchmarks for a number of host systems are given in the +appendices which follow. Sometimes there will be more than one set of +benchmarks for a given host system, e.g., because the system provided two or +more disks or floating point options with different levels of performance. +The notes at the end of each set of benchmarks are intended to document any +special features or problems of the host system which may have affected the +results. In general we did not bother to record things like system tuning +parameters, working set, page faults, etc., unless these were considered an +important factor in the benchmarks. In particular, few IRAF programs page +fault other than during process startup, hence this is rarely a signficant +factor when running these benchmarks (except possibly in IMTRAN). +.PP +Detailed results for each configuration of each host system are presented on +separate pages in the Appendices. A summary table showing the results of +selected benchmarks for all host systems at once is also provided. +The system characteristic or characteristics principally measured by each +benchmark is noted in the table below. This is only approximate, e.g., the +MIPS rating is a significant factor in all but the most i/o bound benchmarks. +.KS +.TS +center; +ci ci ci ci ci +l c c c c. +benchmark responsiveness mips flops i/o + +CLSS \(bu +MKPKGV \(bu +MKHDB \(bu \(bu +PLOTS \(bu \(bu +IMADDS \(bu \(bu +IMADDR \(bu \(bu +IMSTATR \(bu +IMSHIFTR \(bu +IMTRAN \(bu +WBIN \(bu +RBIN \(bu +.TE +.KE +.sp +.PP +By \fIresponsiveness\fR we refer to the interactive response of the system +as perceived by the user. A system with a good interactive response will do +all the little things very fast, e.g., directory listings, image header +listings, plotting from an image, loading new packages, starting up a new +process, and so on. Machines which score high in this area will seem fast +to the user, whereas machines which score poorly will \fIseem\fR slow, +sometimes frustratingly slow, even though they may score high in the areas +of floating point performance, or i/o bandwidth. The interactive response +of a system obviously depends upon the MIPS rating of the system (see below), +but an often more significant factor is the design and computational complexity +of the host operating system itself, in particular the time taken by the host +operating system to execute system calls. Any system which spends a large +fraction of its time in kernel mode will probably have poor interactive +response. The response of the system to loading is also very important, +i.e., if the system has trouble with load balancing as the number of users +(or processes) increases, response will become increasingly erratic until the +interactive response is hopelessly poor. +.PP +The MIPS column refers to the raw speed of the system when executing arbitrary +code containing a mixture of various types of instructions, but little floating +point, i/o, or system calls. A machine with a high MIPS rating will have a +fast cpu, e.g., a fast clock rate, fast memory access time, large cache memory, +and so on, as well as a good optimizing Fortran compiler. Assuming good +compilers, the MIPS rating is primarily a measure of the hardware speed of +the host machine, but all of the MIPS related benchmarks presented here also +make a significant number of system calls (MKHDB, for example, does a lot of +files accesses and text file i/o), hence it is not that simple. Perhaps a +completely cpu bound pure-MIPS benchmark should be added to our suite of +benchmarks (the MIPS rating of every machine is generally well known, however). +.PP +The FLOPS column identifies those benchmarks which do a significant amount of +floating point computation. The IMSHIFTR and IMSTATR benchmarks in particular +are heavily into floating point. These benchmarks measure the single +precision floating point speed of the host system hardware, as well as the +effectiveness of do-loop optimization by the host Fortran compiler. +The degree of optimization provided by the Fortran compiler can affect the +timing of these benchmarks by up to a factor of two. Note that the sample is +very small, and if a compiler fails to optimize the inner loop of one of these +benchmark programs, the situation may be reversed when running some other +benchmark. Any reasonable Fortran compiler should be able to optimize the +inner loop of the IMADDR benchmark, so the CPU timing for this benchmark is +a good measure of the hardware floating point speed, if one allows for do-loop +overhead, memory i/o, and the system calls necessary to access the image on +disk. +.PP +The I/O column identifies those benchmarks which are i/o bound and which +therefore provide some indication of the i/o bandwidth of the host system. +The i/o bandwidth actually achieved in these benchmarks depends upon +many factors, the most important of which are the host operating system +software (files system data structures and i/o software, disk drivers, etc.) +and the host system hardware, i.e., disk type, disk controller type, bus +bandwidth, and DMA memory controller bandwidth. Note that asynchronous i/o +is not currently used in these benchmarks, hence higher transfer rates are +probably possible in special cases (on a busy system all i/o is asynchronous +at the host system level anyway). Large transfers are used to minimize disk +seeks and synchronization delays, hence the benchmarks should provide a good +measure of the realistically achievable host i/o bandwidth. diff --git a/pkg/bench/bench_tab.ms b/pkg/bench/bench_tab.ms new file mode 100644 index 00000000..9245cbff --- /dev/null +++ b/pkg/bench/bench_tab.ms @@ -0,0 +1,98 @@ +.LP +.hm 0.25i +.nr HM 0.25i +.vs 10 +.nr VS 10 +.ll 9.0i +.nr LL 9.0i +.ps 9.0 +.nr PS 9.0 +.po 0.5i +.nr PO 0.5i +.bp +.LP +\fBIRAF V2.5 Table of Selected Benchmark Results May 1987\fR +.br +CPU and/or clock times are tabulated below for selected benchmark tests. +CPU times are given in seconds; clock times (in parentheses) are given +as (m:ss). For the WBIN and RBIN benchmarks, the tabulated result is +the measured bandwidth in Kbytes/second. For a description of the +benchmark tests, see the document "A Set of Benchmarks for Measuring +IRAF System Performance", Doug Tody, May l987. +.sp +.TS +cB cB cB cB s cB cB s cB s cB s cB s cB s cB cB +cB cB cB cB s cB cB s cB s cB s cB s cB s cB cB +lB |n| n| n n| n| n n| n n| n n| n n| n n| n| n|. + CLSS MKPKGV MKHDB PLOTS IMADDS IMADDR IMSTATR IMSHIFTR IMTRAN WBIN RBIN + _ _ _ _ _ _ _ _ _ _ _ + +ISI (0\&:03) (0\&:25) 6\&.00 (0\&:17) (0\&:10) 0\&.89 (0\&:05) 3\&.82 (0\&:10) 7\&.77 (0\&:10) 81\&.60 (1\&:29) 1\&.62 (0\&:06) 294.1 277.8 + +SUN3 (0\&:03) (0\&:17) 5\&.26 (0\&:10) (0\&:09) 0\&.62 (0\&:03) 3\&.34 (0\&:09) 8\&.38 (0\&:11) 83\&.44 (1\&:33) 1\&.47 (0\&:05) 625.0 454.5 + +SUN3+ (0\&:04) (0\&:19) 5\&.28 (0\&:11) (0\&:06) 0\&.63 (0\&:03) 0\&.86 (0\&:06) 5\&.1 (0\&:08) 31\&.1 (0\&:36) 1\&.5 (0\&:04) 714.3 454.5 + +U750 (0\&:17) (0\&:39) 22\&.79 (0\&:40) (0\&:29) 3\&.31 (0\&:10) 4\&.28 (0\&:17) 10\&.98 (0\&:15) 114\&.41 (2\&:13) 10\&.19 (0\&:17) 208.3 208.3 + +V750 (0\&:27) (4\&:17) 46\&.54 (1\&:11) (0\&:25) 5\&.90 (0\&:11) 6\&.48 (0\&:14) 10\&.65 (0\&:14) 69\&.62 (1\&:33) 14\&.85 (0\&:20) 238.1 384.6 + +UMVX (0\&:09) (0\&:37) 15\&.5 (0\&:38) (0\&:20) 2\&.06 (0\&:09) 2\&.98 (0\&:17) 10\&.98 (0\&:16) 95\&.61 (1\&:49) 4\&.93 (0\&:16) 172.4 208.3 + +VMVX n/a n/a n/a n/a (0\&:17) 3\&.44 (0\&:11) 4\&.31 (0\&:15) 9\&.32 (0\&:12) 74\&.72 (1\&:26) 10\&.83 (0\&:35) 192.3 294.1 + +VMVXM (0\&:17) (2\&:16) 27\&.58 (0\&:39) (0\&:16) 3\&.51 (0\&:07) 4\&.31 (0\&:10) 9\&.31 (0\&:11) 74\&.54 (1\&:21) 10\&.81 (0\&:27) 312.5 500.0 + +V780 n/a n/a n/a n/a (0\&:16) 3\&.38 (0\&:08) 4\&.00 (0\&:11) 6\&.88 (0\&:08) 45\&.47 (0\&:53) 7\&.71 (0\&:12) 227.3 416.7 + +V780S (0\&:15) (2\&:09) 26\&.10 (0\&:31) (0\&:19) 3\&.57 (0\&:10) 4\&.22 (0\&:17) 6\&.78 (0\&:10) 45\&.11 (0\&:57) 7\&.83 (0\&:14) 166.7 263.2 + +V8600 (0\&:08) (1\&:05) 8\&.59 (0\&:17) (0\&:09) 1\&.56 (0\&:05) 1\&.28 (0\&:07) 2\&.09 (0\&:04) 13\&.54 (0\&:32) 2\&.58 (0\&:06) 294.1 625.0 + +MV10 (0\&:14) (0\&:29) 6\&.4 (0\&:25) (0\&:09) 1\&.5 (0\&:06) 1\&.6 (0\&:08) 4\&.8 (0\&:07) 39\&.3 (0\&:47) 2\&.9 (0\&:06) 89.3 200.0 + +MV8 (0\&:28) (2\&:17) 13.13 (0\&:57) (0\&:16) 2\&.85 (0\&:12) 3\&.07 (0\&:17) 9\&.87 (0\&:13) 77\&.68 (1\&:31) 5\&.69 (0\&:12) 31\&.1 200\&.0 +.TE +.sp +.LP +\fBKEY:\fR +.TS +lB lw(8.0i). +ISI T{ +Integrated Solutions with 16-Mhz 68020 and 16-Mhz 68881 fp_coprocessor; UNIX +4.2BSD; 8Mb memory; Greenhills compiler +T} +SUN3 T{ +SUN 3/160C with 68881 fp_chip; SUN UNIX 3.3; 8Mb memory; Eagle +disk with 380Mb +T} +SUN3+ T{ +SUN 3/180C with 68881 fp_chip + FPA; SUN UNIX 3.2; 8Mb memory; 380Mb Eagle disk +T} +U750 VAX 11/750+FPA; UNIX 4.3BSD; 8Mb memory; RA81 disk +V750 VAX 11/750+FPA; VMS V4.5; 7.25 Mb memory; RA81/clustered disks +UMVX VAXSTATION II/GPX; ULTRIX 1.2; 5Mb memory; 150 Mb RD54 disk +VMVXM T{ +VAXSTATION II/GPX; MICROVMS V4.5; 5Mb memory; IRAF installed on 300MB +MAXSTOR disk, data files on this disk also +T} +VMVX T{ +VAXSTATION II/GPX; MICROVMS V4.5; 5Mb memory; IRAF on 300MB +MAXSTOR disk, data on 70Mb RD53 (84% full) +T} +V780 T{ +VAX 11/780+FPA; VMS V4.5; 16Mb memory; IRAF installed on an RA81, data on an +RM03 disk with 23 free Mb, Massbus +T} +V780S T{ +VAX 11/780+FPA; VMS V4.5; 16Mb memory; IRAF and data on an RA81 disk, Unibus +T} +V8600 VAX 8600; VMS V4.5; 28Mb memory; RA81/clustered disks +MV10 T{ +MV 10000; AOSVS 7.54; 24Mb memory; 2-600 Mb ARGUS and 2-600 Mb KISMET disks +T} +MV8 T{ +MV 8000 at La Serena; 5Mb memory, 2 large DG disks, 2 small Winchesters, +disks nearly full and badly fragmented +T} +.TE diff --git a/pkg/bench/fortask.cl b/pkg/bench/fortask.cl new file mode 100644 index 00000000..586386e5 --- /dev/null +++ b/pkg/bench/fortask.cl @@ -0,0 +1,15 @@ +# FORTASK -- Execute a foreign task repeatedly. + +procedure fortask (nreps) + +int nreps { prompt = "number of repetitions" } +int i + +begin + time; print ("======= begin ========") + + for (i=nreps; i > 0; i-=1) + !rmbin + + print ("======= end ========"); time +end diff --git a/pkg/bench/mkpkg b/pkg/bench/mkpkg new file mode 100644 index 00000000..d0ada370 --- /dev/null +++ b/pkg/bench/mkpkg @@ -0,0 +1,5 @@ +# Make the bench package. + +$omake x_bench.x +$link x_bench.o +$exit diff --git a/pkg/bench/plots.cl b/pkg/bench/plots.cl new file mode 100644 index 00000000..dc92ae4b --- /dev/null +++ b/pkg/bench/plots.cl @@ -0,0 +1,20 @@ +# PLOTS -- Measure the time required to make a number of row plots of an image. + +procedure plots (image, nlines) + +string image { prompt = "image to be plotted" } +int nlines { prompt = "number of line plots to be made" } + +string imname +int nleft + +begin + cache ("prow") + imname = image + time(); print ("======== start ========") + + for (nleft=nlines; nleft > 0; nleft-=1) + $prow (imname, 50, >G "dev$null") + + print ("======== end ========"); time() +end diff --git a/pkg/bench/subproc.cl b/pkg/bench/subproc.cl new file mode 100644 index 00000000..d1371484 --- /dev/null +++ b/pkg/bench/subproc.cl @@ -0,0 +1,18 @@ +# SUBPROC -- Benchmark the process control facilities. + +procedure subproc (nreps) + +int nreps { prompt = "number of repetitions" } +int i + +begin + time; print ("======= begin ========") + + for (i=nreps; i > 0; i-=1) { + prcache ("imheader") + flprcache ("imheader") + time() + } + + print ("======= end ========"); time +end diff --git a/pkg/bench/x_bench.x b/pkg/bench/x_bench.x new file mode 100644 index 00000000..f6d6e3df --- /dev/null +++ b/pkg/bench/x_bench.x @@ -0,0 +1,229 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <time.h> +include <mach.h> +include <fset.h> +include <knet.h> + +# BENCH -- IRAF benchmark tasks. + +task ptime = t_ptime, + getpar = t_getpar, + wipc = t_wipc, + rbin = t_rbin, + wbin = t_wbin, + rrbin = t_rrbin, + rtext = t_rtext, + wtext = t_wtext + +define SZ_RBBUF 16384 +define SZ_BBUF 4096 +define SZ_TBUF 64 + + +# PTIME -- Print the current clock time. This is essentially a no-op task, +# used to test process connect/disconnect, IPC, and task startup/shutdown +# overhead. + +procedure t_ptime() + +char tbuf[SZ_TIME] +long clktime() + +begin + call cnvtime (clktime (long(0)), tbuf, SZ_TIME) + call printf ("%s\n") + call pargstr (tbuf) +end + + +# GETPAR -- Get a parameter from the CL repeatedly. Used to test the IPC +# turnaround time. + +procedure t_getpar() + +int niter, i +char paramval[SZ_FNAME] +int clgeti() + +begin + niter = clgeti ("niter") + do i = 1, niter + call clgstr ("cl.version", paramval, SZ_FNAME) +end + + +# WIPC -- Write to IPC (tests IPC bandwidth). + +procedure t_wipc() + +int fd, i +char bbuf[SZ_BBUF] +long n, filesize, clgetl() + +begin + fd = STDOUT + filesize = clgetl ("filesize") / SZB_CHAR + + do i = 1, SZ_BBUF + bbuf[i] = mod (i-1, 128) + 1 + + for (n=0; n < filesize; n = n + SZ_BBUF) + call write (fd, bbuf, SZ_BBUF) + + call eprintf ("wrote %d bytes\n") + call pargl (n * SZB_CHAR) +end + + +# RBIN -- Read from a binary file. + +procedure t_rbin() + +long totchars +char fname[SZ_FNAME] +char bbuf[SZ_BBUF] +int fd, open(), read() + +begin + call clgstr ("fname", fname, SZ_FNAME) + fd = open (fname, READ_ONLY, BINARY_FILE) + call fseti (fd, F_ADVICE, SEQUENTIAL) + totchars = 0 + + while (read (fd, bbuf, SZ_BBUF) == SZ_BBUF) + totchars = totchars + SZ_BBUF + + call close (fd) + call printf ("read %d bytes\n") + call pargl (totchars * SZB_CHAR) +end + + +# WBIN -- Write to a binary file. + +procedure t_wbin() + +char fname[SZ_FNAME] +char bbuf[SZ_BBUF] +int fd, i, open() +long n, filesize, clgetl() + +begin + call clgstr ("fname", fname, SZ_FNAME) + iferr (call delete (fname)) + ; + fd = open (fname, APPEND, BINARY_FILE) + call fseti (fd, F_ADVICE, SEQUENTIAL) + filesize = clgetl ("filesize") / SZB_CHAR + + do i = 1, SZ_BBUF + bbuf[i] = mod (i-1, 128) + 1 + + for (n=0; n < filesize; n = n + SZ_BBUF) + call write (fd, bbuf, SZ_BBUF) + + call close (fd) + call printf ("wrote %d bytes\n") + call pargl (n * SZB_CHAR) +end + + +# RTEXT -- Read from a text file. + +procedure t_rtext() + +long totchars +char fname[SZ_FNAME] +char tbuf[SZ_TBUF] +int fd, nchars, nlines +int open(), getline() + +begin + call clgstr ("fname", fname, SZ_FNAME) + fd = open (fname, READ_ONLY, TEXT_FILE) + totchars = 0 + nlines = 0 + + repeat { + nchars = getline (fd, tbuf) + if (nchars > 0) { + totchars = totchars + nchars + nlines = nlines + 1 + } + } until (nchars == EOF) + + call close (fd) + call printf ("read %d chars, %d lines\n") + call pargl (totchars) + call pargi (nlines) +end + + +# WTEXT -- Write to a text file. + +procedure t_wtext() + +char fname[SZ_FNAME] +char tbuf[SZ_TBUF] +int fd, op, open() +long n, nlines, filesize, clgetl() + +begin + call clgstr ("fname", fname, SZ_FNAME) + iferr (call delete (fname)) + ; + fd = open (fname, APPEND, TEXT_FILE) + filesize = clgetl ("filesize") + nlines = 0 + + for (op=1; op < SZ_TBUF; op=op+1) + tbuf[op] = '.' + + tbuf[op] = '\n' + op = op + 1 + tbuf[op] = EOS + + for (n=0; n < filesize; n = n + SZ_TBUF) { + call putline (fd, tbuf) + nlines = nlines + 1 + } + + call close (fd) + call printf ("wrote %d chars, %d lines\n") + call pargl (n) + call pargi (nlines) +end + + +# RRBIN -- Raw (unbuffered) read from a binary file. + +procedure t_rrbin() + +char fname[SZ_FNAME] +char bbuf[SZ_RBBUF] +long totchars, offset, buflen +int fd, chan, status +int open(), fstati() + +begin + call clgstr ("fname", fname, SZ_FNAME) + fd = open (fname, READ_ONLY, BINARY_FILE) + chan = fstati (fd, F_CHANNEL) + + buflen = SZ_RBBUF * SZB_CHAR + totchars = 0 + offset = 1 + status = 0 + + repeat { + totchars = totchars + (status / SZB_CHAR) + call zardbf (chan, bbuf, buflen, offset) + offset = offset + buflen + call zawtbf (chan, status) + } until (status <= 0) + + call close (fd) + call printf ("read %d bytes\n") + call pargl (totchars * SZB_CHAR) +end diff --git a/pkg/bench/xctest/README b/pkg/bench/xctest/README new file mode 100644 index 00000000..724ec929 --- /dev/null +++ b/pkg/bench/xctest/README @@ -0,0 +1,2 @@ +This directory is an example of a small IRAF package, used to benchmark the +time required to compile and link a small package. diff --git a/pkg/bench/xctest/columns.x b/pkg/bench/xctest/columns.x new file mode 100644 index 00000000..ee52abc5 --- /dev/null +++ b/pkg/bench/xctest/columns.x @@ -0,0 +1,74 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <ctype.h> +include <chars.h> +include <error.h> + +define MAX_FILES 12 + +.help columns +.nf___________________________________________________________________ +COLUMNS -- convert a multicolumn file into a multifile column. + One file `sdastemp.n' is produced with each column in a + Separate file. + +usage: COLUMNS number_of_columns File_name +.endhelp______________________________________________________________ + + +# COLUMNS.X -- SDAS support utility +# +# This routine allows SDAS to treat multicolumn tables +# as simple CL lists. Each column in the table is referenced in +# SDAS by a different parameter, pointing in the .par file to +# a different list. This routine is a preprocessor which takes +# a multicolumn file and generates a multifile column. +# +# To allow for column headers in the multicolumn file, +# any line which begins with a `#' will be ignored. +# All data is transferred as text. + +procedure t_columns() + +char fname[SZ_FNAME], outfile[SZ_FNAME], outroot[SZ_FNAME] +char line[SZ_LINE], word[SZ_LINE], filenum[SZ_FNAME] +int numcols, infile +int outnum[MAX_FILES] +int nchar, nfile, ip +int clgeti(), open(), getline(), itoc(), ctowrd() +errchk open, getline + +begin + + # Get the number of columns and the input file name + call clgstr ("filename", fname, SZ_FNAME) + numcols = clgeti ("numcols") + call clgstr ("outroot", outroot, SZ_FNAME) + + # Open all the files + infile = open (fname, READ_ONLY, TEXT_FILE) + for (nfile=1; nfile <= numcols; nfile=nfile+1) { + nchar = itoc (nfile, filenum, 2) + call strcpy ( outroot, outfile, SZ_FNAME) + call strcat ( filenum, outfile, SZ_FNAME) + outnum[nfile] = open (outfile, NEW_FILE, TEXT_FILE) + } + + # Separate each line of the input file + while (getline(infile, line) != EOF) { + if ((line[1] != '#') && (line[1] != '\n')) { + ip = 1 + for (nfile=1; nfile <= numcols; nfile=nfile+1) { + nchar = ctowrd (line, ip, word, SZ_LINE) + call strcat ('\n',word, SZ_LINE) + call putline (outnum[nfile], word) + } + } + } + + # close the files + call close(infile) + for (nfile=1; nfile <= numcols; nfile=nfile+1) { + call close(outnum[nfile]) + } +end diff --git a/pkg/bench/xctest/lintran.x b/pkg/bench/xctest/lintran.x new file mode 100644 index 00000000..fe0ffdbc --- /dev/null +++ b/pkg/bench/xctest/lintran.x @@ -0,0 +1,370 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <pattern.h> +include <ctype.h> + +define MAX_FIELDS 100 # Maximum number of fields in list +define TABSIZE 8 # Spacing of tab stops +define LEN_TR 9 # Length of structure TR + +# The TR transformation descriptor structure. + +define X1 Memr[P2R($1)] # Input origin +define Y1 Memr[P2R($1+1)] +define XSCALE Memr[P2R($1+2)] # Scale factors +define YSCALE Memr[P2R($1+3)] +define THETA Memr[P2R($1+4)] # Rotation angle +define X2 Memr[P2R($1+5)] # Output origin +define Y2 Memr[P2R($1+6)] +define COS_THETA Memr[P2R($1+7)] +define SIN_THETA Memr[P2R($1+8)] + + +# LINTRAN -- Performs a linear translation on each element of the +# input list, producing a transformed list as output. + +procedure t_lintran() + +char in_fname[SZ_FNAME] +int list +pointer sp, tr +int xfield, yfield, min_sigdigits + +int clgeti(), clpopni(), clgfil() + +begin + # Allocate memory for transformation parameters structure + call smark (sp) + call salloc (tr, LEN_TR, TY_STRUCT) + + # Call procedure to get parameters and fill structure + call lt_initialize_transform (tr) + + # Get field numbers from cl + xfield = clgeti ("xfield") + yfield = clgeti ("yfield") + min_sigdigits = clgeti("min_sigdigits") + + # Open template of input files + list = clpopni ("files") + + # While input list is not depleted, open file and transform list + while (clgfil (list, in_fname, SZ_FNAME) != EOF) + call lt_transform_file (in_fname, xfield, yfield, min_sigdigits, tr) + + # Close template + call clpcls (list) + call sfree (sp) +end + + +# LT_INITIALIZE_TRANSFORM -- gets parameter values relevant to the +# transformation from the cl. List entries will be transformed +# in procedure lt_transform. Scaling is performed +# first, followed by translation and then rotation. + +procedure lt_initialize_transform (tr) + +pointer tr + +bool clgetb() +real clgetr() + +begin + # Get parameters from cl + X1(tr) = clgetr ("x1") # (x1,y1) = crnt origin + Y1(tr) = clgetr ("y1") + XSCALE(tr) = clgetr ("xscale") + YSCALE(tr) = clgetr ("yscale") + THETA(tr) = clgetr ("angle") + if (! clgetb ("radians")) + THETA(tr) = THETA(tr) / 57.29577951 + X2(tr) = clgetr ("x2") # (x2,y2) = new origin + Y2(tr) = clgetr ("y2") + + # The following terms are constant for a given transformation. + # They are calculated once and saved in the structure. + + COS_THETA(tr) = cos (THETA(tr)) + SIN_THETA(tr) = sin (THETA(tr)) +end + + +# LT_TRANSFORM_FILE -- This procedure is called once for each file +# in the input list. For each line in the input file that isn't +# blank or comment, the line is transformed. Blank and comment +# lines are output unaltered. + +procedure lt_transform_file (in_fname, xfield, yfield, min_sigdigits, tr) + +char in_fname[ARB] +int xfield, yfield +pointer tr + +char outbuf[SZ_LINE] +int nfields, nchars, max_fields, in, nline +int nsdig_x, nsdig_y, offset, min_sigdigits +pointer sp, field_pos, linebuf, inbuf, ip +double x, y, xt, yt +int getline(), lt_get_num(), open() + +begin + call smark (sp) + call salloc (inbuf, SZ_LINE, TY_CHAR) + call salloc (linebuf, SZ_LINE, TY_CHAR) + call salloc (field_pos, MAX_FIELDS, TY_INT) + + max_fields = MAX_FIELDS + + # Open input file + in = open (in_fname, READ_ONLY, TEXT_FILE) + + for (nline=1; getline (in, Memc[inbuf]) != EOF; nline = nline + 1) { + for (ip=inbuf; IS_WHITE(Memc[ip]); ip=ip+1) + ; + if (Memc[ip] == '#') { + # Pass comment lines on to the output unchanged. + call putline (STDOUT, Memc[inbuf]) + next + } else if (Memc[ip] == '\n' || Memc[ip] == EOS) { + # Blank lines too. + call putline (STDOUT, Memc[inbuf]) + next + } + + # Expand tabs into blanks, determine field offsets. + call strdetab (Memc[inbuf], Memc[linebuf], SZ_LINE, TABSIZE) + call lt_find_fields (Memc[linebuf], Memi[field_pos], + max_fields, nfields) + + if (xfield > nfields || yfield > nfields) { + call eprintf ("Not enough fields in file '%s', line %d\n") + call pargstr (in_fname) + call pargi (nline) + call putline (STDOUT, Memc[linebuf]) + next + } + + offset = Memi[field_pos + xfield-1] + nchars = lt_get_num (Memc[linebuf+offset-1], x, nsdig_x) + if (nchars == 0) { + call eprintf ("Bad x value in file '%s' at line %d:\n") + call pargstr (in_fname) + call pargi (nline) + call putline (STDOUT, Memc[linebuf]) + next + } + + offset = Memi[field_pos + yfield-1] + nchars = lt_get_num (Memc[linebuf+offset-1], y, nsdig_y) + if (nchars == 0) { + call eprintf ("Bad y value in file '%s' at line %d:\n") + call pargstr (in_fname) + call pargi (nline) + call putline (STDOUT, Memc[linebuf]) + next + } + + call lt_transform (x, y, xt, yt, tr) + + call lt_pack_line (Memc[linebuf], outbuf, SZ_LINE, Memi[field_pos], + nfields, xfield, yfield, xt, yt, nsdig_x, nsdig_y, min_sigdigits) + + call putline (STDOUT, outbuf) + } + + call sfree (sp) + call close (in) +end + + +# LT_FIND_FIELDS -- This procedure finds the starting column for each field +# in the input line. These column numbers are returned in the array +# field_pos; the number of fields is also returned. + +procedure lt_find_fields (linebuf, field_pos, max_fields, nfields) + +char linebuf[SZ_LINE] +int field_pos[max_fields],max_fields, nfields +bool in_field +int ip, field_num + +begin + field_num = 1 + field_pos[1] = 1 + in_field = false + + for (ip=1; linebuf[ip] != '\n' && linebuf[ip] != EOS; ip=ip+1) { + if (! IS_WHITE(linebuf[ip])) + in_field = true + else if (in_field) { + in_field = false + field_num = field_num + 1 + field_pos[field_num] = ip + } + } + + field_pos[field_num+1] = ip + nfields = field_num +end + + +# LT_GET_NUM -- The field entry is converted from character to double +# in preparation for the transformation. The number of significant +# digits is counted and returned as an argument; the number of chars in +# the number is returned as the function value. + +int procedure lt_get_num (linebuf, dval, nsdig) + +char linebuf[SZ_LINE] +int nsdig +double dval +char ch +int nchar, ip + +int gctod() + +begin + ip = 1 + nsdig = 0 + nchar = gctod (linebuf, ip, dval) + if (nchar == 0 || IS_INDEFD (dval)) + return (nchar) + + # Skip leading white space. + ip = 1 + repeat { + ch = linebuf[ip] + if (! IS_WHITE(ch)) + break + ip = ip + 1 + } + + # Count signifigant digits + for (; ! IS_WHITE(ch) && ch != '\n' && ch != EOS; ch=linebuf[ip]) { + if (IS_DIGIT (ch)) + nsdig = nsdig + 1 + ip = ip + 1 + } + return (nchar) +end + + +# LT_TRANSFORM -- The linear transformation is performed in this procedure. +# First the coordinates are scaled, then rotated and translated. The +# transformed coordinates are returned. + +procedure lt_transform (x, y, xt, yt, tr) + +double x, y, xt, yt +pointer tr +double xtemp, ytemp + +begin + # Subtract off current origin: + if (IS_INDEFD (x)) + xt = INDEFD + else { + xt = x - X1(tr) + } + if (IS_INDEFD (y)) + yt = INDEFD + else { + yt = y - Y1(tr) + } + + # Scale and rotate coordinates: + if (THETA(tr) == 0) { + if (!IS_INDEFD (xt)) + xt = xt * XSCALE(tr) + X2(tr) + if (!IS_INDEFD (yt)) + yt = yt * YSCALE(tr) + Y2(tr) + return + + } else if (IS_INDEFD(xt) || IS_INDEFD(yt)) { + # Non-zero angle and either coordinate indefinite results in + # both transformed coordinates = INDEFD + xt = INDEFD + yt = INDEFD + return + } + + # Rotation for non-zero angle and both coordinates defined + xtemp = xt * XSCALE(tr) + ytemp = yt * YSCALE(tr) + + xt = xtemp * COS_THETA(tr) - ytemp * SIN_THETA(tr) + yt = xtemp * SIN_THETA(tr) + ytemp * COS_THETA(tr) + + # Now shift the rotated coordinates + xt = xt + X2(tr) + yt = yt + Y2(tr) +end + + +# LT_PACK_LINE -- Fields are packed into the outbuf buffer. Transformed +# fields are converted to strings; other fields are copied from +# the input line to output buffer. + +procedure lt_pack_line (inbuf, outbuf, maxch, field_pos, nfields, + xfield, yfield, xt, yt, nsdig_x, nsdig_y, min_sigdigits) + +char inbuf[ARB], outbuf[maxch] +int maxch, field_pos[ARB], nfields, xfield, yfield, nsdig_x, nsdig_y +int min_sigdigits +double xt, yt + +char field[SZ_LINE] +int num_field, width, op + +int gstrcpy() + +begin + # Initialize output pointer. + op = 1 + + do num_field = 1, nfields { + width = field_pos[num_field + 1] - field_pos[num_field] + + if (num_field == xfield) { + call lt_format_field (xt, field, maxch, nsdig_x, width, + min_sigdigits) + } else if (num_field == yfield) { + call lt_format_field (yt, field, maxch, nsdig_y, width, + min_sigdigits) + } else { + # Put "width" characters from inbuf into field + call strcpy (inbuf[field_pos[num_field]], field, width) + } + + # Fields must be delimited by at least one blank. + if (num_field > 1 && !IS_WHITE (field[1])) { + outbuf[op] = ' ' + op = op + 1 + } + + # Copy "field" to output buffer. + op = op + gstrcpy (field, outbuf[op], maxch) + } + + outbuf[op] = '\n' + outbuf[op+1] = EOS +end + + +# LT_FORMAT_FIELD -- A transformed coordinate is written into a string +# buffer. The output field is of (at least) the same width and significance +# as the input list entry. + +procedure lt_format_field (dval, wordbuf, maxch, nsdig, width, min_sigdigits) + +char wordbuf[maxch] +int width, nsdig, maxch, min_sigdigits +double dval + +begin + call sprintf (wordbuf, maxch, "%*.*g") + call pargi (width) + call pargi (max (min_sigdigits, nsdig)) + call pargd (dval) +end diff --git a/pkg/bench/xctest/mkpkg b/pkg/bench/xctest/mkpkg new file mode 100644 index 00000000..87b4c792 --- /dev/null +++ b/pkg/bench/xctest/mkpkg @@ -0,0 +1,25 @@ +# Make the LISTS package + +$call relink +$exit + +relink: + $set LIBS = "-lxtools" + + $update libpkg.a + $omake x_lists.x + $link x_lists.o libpkg.a $(LIBS) + ; + +clean: + $delete libpkg.a x_lists.o x_lists.e + ; + +libpkg.a: + table.x <ctype.h> + words.x + tokens.x <ctotok.h> + unique.x + lintran.x <pattern.h> <ctype.h> + columns.x <ctype.h> <chars.h> <error.h> + ; diff --git a/pkg/bench/xctest/table.x b/pkg/bench/xctest/table.x new file mode 100644 index 00000000..75e0a3e3 --- /dev/null +++ b/pkg/bench/xctest/table.x @@ -0,0 +1,111 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <ctype.h> + +# Read a list of strings from the standard input or a list of files and +# assemble them into a nicely formatted table. If reading from multiple +# input files, make a separate table for each. There is no fixed limit +# to the size of the table which can be formatted. The table is not +# sorted; this should be done as a separate operation if desired. + +define INIT_STRBUF 512 +define STRBUF_INCREMENT 1024 +define INIT_MAXSTR 64 +define MAXSTR_INCREMENT 128 + + +procedure t_table() + +int list, first_col, last_col, ncols, maxstrlen +int fd, nextch, nstrings, maxch, sz_strbuf, max_strings, ip +pointer sp, strbuf, fname, stroff +int strlen(), fscan(), nscan(), clpopni() +int clgfil(), open(), envgeti(), clplen(), clgeti() + +begin + # Allocate buffers. The string buffer "strbuf", and associated list + # of offsets "stroff" will be reallocated later if they fill up. + call smark (sp) + call salloc (fname, SZ_FNAME, TY_CHAR) + + call malloc (strbuf, INIT_STRBUF, TY_CHAR) + call malloc (stroff, INIT_MAXSTR, TY_INT) + + + # Get various table formatting parameters from CL. + ncols = clgeti ("ncols") + first_col = clgeti ("first_col") + last_col = clgeti ("last_col") + + # Attempt to read the terminal x-dimension from the environment, + # if the user did not specify a valid "last_col". No good reason + # to abort if cannot find environment variable. + if (last_col == 0) + iferr (last_col = envgeti ("ttyncols")) + last_col = 80 + + # Set maximum string length to size of an output line if max length + # not given. + maxstrlen = clgeti ("maxstrlen") + if (maxstrlen == 0) + maxch = last_col - first_col + 1 + else + maxch = min (maxstrlen, last_col - first_col + 1) + + max_strings = INIT_MAXSTR + sz_strbuf = INIT_STRBUF + + + # Read the contents of each file into a big string buffer. Print a + # separate table for each file. + + list = clpopni ("input_files") + + while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) { + fd = open (Memc[fname], READ_ONLY, TEXT_FILE) + nextch = 1 + nstrings = 0 + + # If printing several tables, label each with the name of the file. + if (clplen (list) > 1) { + call printf ("\n==> %s <==\n") + call pargstr (Memc[fname]) + } + + while (fscan (fd) != EOF) { + call gargstr (Memc[strbuf+nextch-1], maxch) + # Ignore blank lines and faulty scans. + if (nscan() == 0) + next + for (ip=strbuf+nextch-1; IS_WHITE (Memc[ip]); ip=ip+1) + ; + if (Memc[ip] == '\n' || Memc[ip] == EOS) + next + + # Save one indexed string index for strtbl. + Memi[stroff+nstrings] = nextch + nextch = nextch + strlen (Memc[strbuf+nextch-1]) + 1 + + # Check buffers, make bigger if necessary. + if (nextch + maxch >= sz_strbuf) { + sz_strbuf = sz_strbuf + STRBUF_INCREMENT + call realloc (strbuf, sz_strbuf, TY_CHAR) + } + # Add space for more string offsets if too many strings. + nstrings = nstrings + 1 + if (nstrings > max_strings) { + max_strings = max_strings + MAXSTR_INCREMENT + call realloc (stroff, max_strings, TY_INT) + } + } + + # Print the table on the standard output. + call strtbl (STDOUT, Memc[strbuf], Memi[stroff], nstrings, + first_col, last_col, maxch, ncols) + } + + call clpcls (list) + call mfree (strbuf, TY_CHAR) + call mfree (stroff, TY_INT) + call sfree (sp) +end diff --git a/pkg/bench/xctest/tokens.x b/pkg/bench/xctest/tokens.x new file mode 100644 index 00000000..c8793748 --- /dev/null +++ b/pkg/bench/xctest/tokens.x @@ -0,0 +1,140 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <ctotok.h> + +.help tokens +.nf ___________________________________________________________________________ +TOKENS -- Break the input up into a series of tokens. The makeup of the +various tokens is defined by the FMTIO primitive ctotok, which is not very +sophisticated, and does not claim to recognize the tokens for any particular +language (though it does reasonably well for most modern languages). Comments +can be deleted if desired, and newlines may be passed on to the output as +tokens. + +Comments are delimited by user specified strings. Only strings which are also +recognized by ctotok() as legal tokens may be used as comment delimiters. +If newline marks the end of a comment, the end_comment string should be given +as "eol". Examples of acceptable comment conventions are ("#", eol), +("/*", "*/"), ("{", "}"), and ("!", eol). Fortran style comments ("^{c}",eol) +can be stripped by filtering with match beforehand. + +Each token is passed to the output on a separate line. Multiple newline +tokens are compressed to a single token (a blank line). If newline is not +desired as an output token, it is considered whitespace and serves only to +delimit tokens. +.endhelp ______________________________________________________________________ + +define SZ_COMDELIMSTR 20 # Comment delimiter string. + +procedure t_tokens() + +bool ignore_comments, comment_delimiter_is_eol +bool in_comment, pass_newlines +char begin_comment[SZ_COMDELIMSTR], end_comment[SZ_COMDELIMSTR] +int fd, list, token, last_token, last_nscan +pointer sp, fname, tokbuf, outstr, ip, op + +bool streq(), clgetb() +int clpopni(), clgfil(), fscan(), nscan(), open(), ctocc() + +begin + call smark (sp) + call salloc (fname, SZ_FNAME, TY_CHAR) + call salloc (tokbuf, SZ_LINE, TY_CHAR) + call salloc (outstr, SZ_LINE, TY_CHAR) + + # If comments are to be ignored, get comment delimiters. + ignore_comments = clgetb ("ignore_comments") + if (ignore_comments) { + call clgstr ("begin_comment", begin_comment, SZ_COMDELIMSTR) + call clgstr ("end_comment", end_comment, SZ_COMDELIMSTR) + comment_delimiter_is_eol = streq (end_comment, "eol") + } else { + # Set begin_comment to null string to ensure that we never + # enter skip comment mode. This requires that we check for the + # EOS token before the begin_comment token below. + begin_comment[1] = EOS + } + + # Is newline a token? + pass_newlines = clgetb ("newlines") + + + # Merge all input files into a single stream of tokens on the standard + # output. + list = clpopni ("files") + + while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) { + fd = open (Memc[fname], READ_ONLY, TEXT_FILE) + last_token = NULL + + while (fscan (fd) != EOF) { + # Break input line into a stream of tokens. + repeat { + last_nscan = nscan() + call gargtok (token, Memc[tokbuf], SZ_LINE) + + # If "nscan" did not increment (actually impossible with + # gargtok) the line has been exhausted. + if (nscan() == last_nscan) + break + + # If busy ignoring a comment, check for delimiter. + if (in_comment) { + if (comment_delimiter_is_eol && + (token == TOK_NEWLINE || token == TOK_EOS)) { + in_comment = false + if (pass_newlines && last_token != TOK_NEWLINE) { + call printf ("\n") + last_token = TOK_NEWLINE + } + break + } else if (streq (Memc[tokbuf], end_comment)) { + in_comment = false + next + } else + next + } + + # If we get here, we are not processing a comment. + + if (token == TOK_NEWLINE) { + if (pass_newlines && last_token != TOK_NEWLINE) + call printf ("\n") + last_token = TOK_NEWLINE + break + + } else if (token == TOK_EOS) { + # EOS is not counted as a token (do not set last_token, + # do not generate any output). + break + + } else if (streq (Memc[tokbuf], begin_comment)) { + in_comment = true + # Do not change last_token, since comment token + # is to be ignored. + next + + } else if (token == TOK_STRING) { + # Convert control characters into printable + # sequences before printing string token. + op = outstr + for (ip=tokbuf; Memc[ip] != EOS; ip=ip+1) + op = op + ctocc (Memc[ip], Memc[op], SZ_LINE) + call printf ("\"%s\"\n") + call pargstr (Memc[outstr]) + + } else { # most tokens + call printf ("%s\n") + call pargstr (Memc[tokbuf]) + } + + last_token = token + } + } + call close (fd) + } + + call clpcls (list) + call sfree (sp) +end diff --git a/pkg/bench/xctest/unique.x b/pkg/bench/xctest/unique.x new file mode 100644 index 00000000..fcabfe00 --- /dev/null +++ b/pkg/bench/xctest/unique.x @@ -0,0 +1,46 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +# UNIQUE -- Pass only unique lines from the (presumably sorted) standard +# input to the standard output. In other words, if a sequence of identical +# lines are found in the input, only one copy is passed to the output. + +procedure t_unique() + +int list, fd +pointer sp, fname, old_line, new_line, temp +bool streq() +int getline(), clpopni(), clgfil(), clplen(), open() + +begin + call smark (sp) + call salloc (fname, SZ_FNAME, TY_CHAR) + call salloc (old_line, SZ_LINE, TY_CHAR) + call salloc (new_line, SZ_LINE, TY_CHAR) + + list = clpopni ("files") + + while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) { + fd = open (Memc[fname], READ_ONLY, TEXT_FILE) + if (clplen (list) > 1) { + call printf ("\n\n==> %s <==\n") + call pargstr (Memc[fname]) + } + + Memc[old_line] = EOS + + while (getline (fd, Memc[new_line]) != EOF) { + if (streq (Memc[old_line], Memc[new_line])) + next + call putline (STDOUT, Memc[new_line]) + + # Swap buffers. + temp = old_line + old_line = new_line + new_line = temp + } + + call close (fd) + } + + call sfree (sp) +end diff --git a/pkg/bench/xctest/words.x b/pkg/bench/xctest/words.x new file mode 100644 index 00000000..42f4f97e --- /dev/null +++ b/pkg/bench/xctest/words.x @@ -0,0 +1,44 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +# WORDS -- Break the input up into a series of words or strings. A word +# is a sequence of characters delimited by whitespace or newline. A string +# is delimited by single or double quotes, and may not span more than a single +# line. + +procedure t_words() + +int fd, list, last_nscan +pointer sp, fname, word +int clpopni(), clgfil(), fscan(), nscan(), open() + +begin + call smark (sp) + call salloc (fname, SZ_FNAME, TY_CHAR) + call salloc (word, SZ_LINE, TY_CHAR) + + list = clpopni ("files") + + while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) { + fd = open (Memc[fname], READ_ONLY, TEXT_FILE) + + # We do not know how may "words" there are on a line; get words + # until no more. + while (fscan (fd) != EOF) + repeat { + # When nscan() does not increment after a call to gargwrd(), + # we are all done. + last_nscan = nscan() + call gargwrd (Memc[word], SZ_LINE) + if (nscan() > last_nscan) { + call printf ("%s\n") + call pargstr (Memc[word]) + } else + break + } + + call close (fd) + } + + call clpcls (list) + call sfree (sp) +end diff --git a/pkg/bench/xctest/x_lists.x b/pkg/bench/xctest/x_lists.x new file mode 100644 index 00000000..01229e61 --- /dev/null +++ b/pkg/bench/xctest/x_lists.x @@ -0,0 +1,10 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +# Process configuration of the LISTS package. + +task table = t_table, + tokens = t_tokens, + unique = t_unique, + words = t_words, + lintran = t_lintran, + columns = t_columns |