aboutsummaryrefslogtreecommitdiff
path: root/sys/fio/doc
diff options
context:
space:
mode:
authorJoseph Hunkeler <jhunkeler@gmail.com>2015-07-08 20:46:52 -0400
committerJoseph Hunkeler <jhunkeler@gmail.com>2015-07-08 20:46:52 -0400
commitfa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch)
treebdda434976bc09c864f2e4fa6f16ba1952b1e555 /sys/fio/doc
downloadiraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz
Initial commit
Diffstat (limited to 'sys/fio/doc')
-rw-r--r--sys/fio/doc/fio.hd54
-rw-r--r--sys/fio/doc/fio.hlp1912
-rw-r--r--sys/fio/doc/fio.men50
-rw-r--r--sys/fio/doc/vfn.hlp1028
4 files changed, 3044 insertions, 0 deletions
diff --git a/sys/fio/doc/fio.hd b/sys/fio/doc/fio.hd
new file mode 100644
index 00000000..08da85cc
--- /dev/null
+++ b/sys/fio/doc/fio.hd
@@ -0,0 +1,54 @@
+# Help directory for the FIO (file i/o) system package.
+
+$fio = "sys$fio/"
+
+access hlp = access.hlp, src = fio$access.x
+aread hlp = aread.hlp, src = fio$aread.x
+areadb hlp = areadb.hlp, src = fio$areadb.x
+await hlp = await.hlp, src = fio$await.x
+awaitb hlp = awaitb.hlp, src = fio$awaitb.x
+awrite hlp = awrite.hlp, src = fio$awrite.x
+awriteb hlp = awriteb.hlp, src = fio$awriteb.x
+close hlp = close.hlp, src = fio$close.x
+delete hlp = delete.hlp, src = fio$delete.x
+diropen hlp = diropen.hlp, src = fio$diropen.x
+falloc hlp = falloc.hlp, src = fio$falloc.x
+fcopy hlp = fcopy.hlp, src = fio$fcopy.x
+fdevbf hlp = fdevbf.hlp, src = fio$fdevbf.x
+fdevtx hlp = fdevtx.hlp, src = fio$fdevtx.x
+finfo hlp = finfo.hlp, src = fio$finfo.x
+flush hlp = flush.hlp, src = fio$flush.x
+fnextn hlp = fnextn.hlp, src = fio$fnextn.x
+fnldir hlp = fnldir.hlp, src = fio$fnldir.x
+fnroot hlp = fnroot.hlp, src = fio$fnroot.x
+fntcls hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntclsb hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntgfn hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntgfnb hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntlenb hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntopn hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntopnb hlp = fntgfn.hlp, src = fio$fntgfn.x
+fntrewb hlp = fntgfn.hlp, src = fio$fntgfn.x
+fopnbf hlp = fopnbf.hlp, src = fio$fopnbf.x
+fopntx hlp = fopntx.hlp, src = fio$fopntx.x
+fowner hlp = fowner.hlp, src = fio$fowner.x
+fpathname hlp = fpathname.hlp, src = fio$fpathname.x
+fseti hlp = fseti.hlp, src = fio$fseti.x
+fstati hlp = fstati.hlp, src = fio$fstati.x
+fstatl hlp = fstatl.hlp, src = fio$fstatl.x
+fstats hlp = fstats.hlp, src = fio$fstats.x
+getc hlp = getc.hlp, src = fio$getc.x
+getline hlp = getline.hlp, src = fio$getline.x
+mktemp hlp = mktemp.hlp, src = fio$mktemp.x
+note hlp = note.hlp, src = fio$note.x
+open hlp = open.hlp, src = fio$open.x
+protect hlp = protect.hlp, src = fio$protect.x
+putc hlp = putc.hlp, src = fio$putc.x
+putcc hlp = putcc.hlp, src = fio$putcc.x
+putline hlp = putline.hlp, src = fio$putline.x
+read hlp = read.hlp, src = fio$read.x
+rename hlp = rename.hlp, src = fio$rename.x
+reopen hlp = reopen.hlp, src = fio$reopen.x
+seek hlp = seek.hlp, src = fio$seek.x
+stropen hlp = stropen.hlp, src = fio$stropen.x
+write hlp = write.hlp, src = fio$write.x
diff --git a/sys/fio/doc/fio.hlp b/sys/fio/doc/fio.hlp
new file mode 100644
index 00000000..1f87049f
--- /dev/null
+++ b/sys/fio/doc/fio.hlp
@@ -0,0 +1,1912 @@
+
+.help fio Jan83 "File i/o Design Rev.5"
+.tp 30
+.sh
+STRUCTURE OF THE BASIC FILE I/O PROCEDURES
+
+ The high level FIO input procedures are GETC, GETLINE, and READ.
+These procedures read directly out of the "current buffer". When the
+buffer is exhausted, FILBUF is called to refill the buffer. The action
+taken by FILBUF depends on whether the file contains text or binary data,
+but does not depend on the characteristics of the device on which the
+file is resident. The output procedures are similar to the input
+procedures, except that FLSBUF is called to flush the buffer when it fills.
+
+
+
+
+.ks
+.nf
+ getc getline read
+
+
+
+ filbuf
+
+
+ text files binary files
+
+ zgettx fmkbfs ffault
+
+
+
+ Structure of the Input Procedures
+.fi
+.ke
+
+
+
+
+
+.ks
+.nf
+ putc putline write
+
+
+
+
+ flsbuf
+
+
+ text files binary files
+
+
+ zputtx fmkbfs ffault
+
+
+
+ Structure of the Output Procedures
+.fi
+.ke
+
+
+The "file fault" procedure (FFAULT) is called by both FILBUF and FLSBUF
+for binary files, when the referenced data lies outside the range of
+the current buffer.
+
+
+
+.ks
+.nf
+ ffault
+
+
+
+
+ ffilbf frelnk fflsbf
+
+
+
+
+ fwatio fbseek
+
+
+
+ aread await aseek/anote awrite
+
+
+
+ zaread zawait zseek/znote zawrite
+
+
+
+ FIO Structure for Accessing Binary Files
+.fi
+.ke
+
+
+In the above structure chart, the "z" routines at the lowest level
+are system and device dependent, and are actually part of the system
+interface, rather than FIO. A separate set of z-routines is required
+for each device serviced by FIO (regular binary files, the CL interface,
+pipes, magtapes, memory, etc.).
+
+All of the system and device dependence of FIO is concentrated
+into the z-routines. Only the routines AREAD, AWRITE, and AWAIT know
+that more than one type of binary device is serviced by FIO. Furthermore,
+FIO maintains a device table containing the entry point addresses of
+the z-routines for each device. This provides a clean interface to the
+device dependent routines, and makes it possible to add new devices
+without editing the source for FIO. In fact, it is possible to interface
+new devices to FIO dynamically, at run time.
+
+
+.tp 10
+.sh
+SEMICODE FOR THE BASIC FILE I/O PROCEDURES
+
+ The procedures GETC and PUTC read and write character data, a single
+character at a time. Since these procedures may be called once for each
+character in a file, they must be as efficient (ergo, simple) as feasible.
+These machine code for these routines should be hand optimized if much
+text processing (i.e. compilations) is anticipated.
+
+
+
+.nf
+.tp 5
+int procedure getc (fd, ch) # get character
+
+begin
+ if (iop < bufptr || iop >= itop) # buffer exhausted?
+ switch (filbuf(fd)) {
+ case EOF:
+ return (EOF)
+ case ERR:
+ take error action
+ }
+
+ ch = Mem[iop]
+ iop = iop + 1
+
+ return (ch)
+end
+
+
+
+.tp 5
+procedure putc (fd, ch) # put character
+
+begin
+ if (iop < bufptr || iop >= otop) { # buffer full?
+ if (flsbuf (fd) == ERR)
+ take error action
+ }
+
+ Mem[iop] = ch
+ iop = iop + 1
+
+ if (ch == newline) { # end of line?
+ if (flush on newline is enabled for this file)
+ if (flsbuf (fd) == ERR)
+ take error action
+ }
+end
+.fi
+
+
+Characters and strings (and even binary data) may be "pushed back" into
+the input stream. UNGETC pushes a single character. Subsequent calls
+to GETC, GETLINE, READ, etc. will read out the characters in the order
+in which they were pushed (first in, first out). When all of the
+pushback data has been read, reading resumes at the preceeding file
+position, which may either be in one of the primary buffers, or an
+earlier state in the pushback buffer.
+
+UNGETS differs from UNGETC in that it pushes back whole strings,
+in a last in, first out fashion. UNGETS is used to implement recursive
+macro expansions. The amount of recursion permitted may be specified
+after the file is opened, and before any data is pushed back. Recursion
+is limited by the size of the input pointer stack, and pushback capacity
+by the size of the pushback buffer.
+
+
+.tp 5
+.nf
+procedure ungetc (fd, ch) # push back a character
+
+begin
+ if (iop < bufptr || iop >= otop) {
+ if (no pushback buffer)
+ create pushback buffer
+ else
+ error: "pushback buffer overflow"
+
+ stack old iop, itop
+
+ set iop to point at beginning of the pushback buffer,
+ set itop to iop, otop to top of pushback buffer.
+ }
+
+ Mem[iop] = ch
+ iop = iop + 1
+ itop = itop + 1
+end
+
+
+
+.tp 5
+procedure ungets (fd, str) # recursively push back a string
+
+begin
+ if (iop < bufptr || iop >= otop) {
+ if (no pushback buffer) {
+ create pushback buffer
+ setup iop, buftop for pushback buffer
+ } else
+ error: "pushback buffer overflow"
+ }
+
+ stack old iop, itop
+ copy string to Mem[iop], advance iop
+ itop = iop
+end
+.fi
+
+
+
+Calls to GETLINE may be intermixed with calls to GETC, READ, and so on.
+If, however, only GETLINE is used to access a file, and the associated
+file is a text file, a file buffer will never need to be created (the
+data will be placed directly in the user buffer instead).
+If a buffer has been created and is not yet empty, GETLINE will read the
+remainder of the current line from that buffer, before again calling FILBUF.
+
+The newline character is returned as part of the line. The maximum size
+of a line (size of a line buffer) is set at compile time by the system
+wide constant SZ_LINE. The constant SZ_LINE includes space for the newline
+character, but not for the EOS marker (character array dimensions never
+include space for the EOS, because the preprocessor automatically allows an
+extra character for the EOS when dimensioning the array for Fortran).
+.nf
+
+
+.tp 5
+int procedure getline (fd, linebuf) # get a line from file
+
+begin
+ op = 1
+ if (buffer is empty and file type is TEXT_FILE) {
+ # call ZGETTX to copy line directly into user linebuf
+ zgettx (channel(fd), linebuf, status)
+
+ } else {
+ while (op <= SZ_LINE) {
+ if (iop < bufptr || iop >= itop) {
+ status = filbuf (fd)
+ if (status == ERR || status == EOF)
+ break
+ }
+
+ linebuf[op] = Mem[iop]
+ iop = iop + 1
+ op = op + 1
+
+ if (the character was newline)
+ break
+ }
+ linebuf[op] = EOS
+ }
+
+ if (status == ERR)
+ take error action
+ else if (op == 1)
+ return (EOF)
+ else
+ return (op - 1) # number of chars
+end
+
+
+
+
+.tp 5
+procedure putline (fd, linebuf) # put a line to file
+
+begin
+ for (i=1; linebuf[i] != EOS; i=i+1) {
+ if (iop < bufptr || iop >= otop)
+ if (flsbuf (fd) == ERR)
+ take error action
+ }
+
+ Mem[iop] = linebuf[i]
+ iop = iop + 1
+
+ if (the character is newline) {
+ if (flush on newline is enabled)
+ if (flsbuf (fd) == ERR)
+ take error action
+ }
+ }
+end
+
+
+
+.fi
+The READ procedure reads a maximum of MAXCHARS characters from the file
+FD into the user supplied buffer BUFFER. In the case of block structured
+devices, READ will continue to read blocks from the file until the output
+buffer has filled. In the case of record structured devices (i.e., terminals,
+text files, pipes) READ will read at most one record, after exhausting the
+contents of the file buffer.
+
+
+
+.tp 5
+.nf
+int procedure read (fd, buffer, maxchars)
+
+begin
+ check that fd is a valid file opened for reading
+ nchars = 0
+
+ while (nchars <= maxchars) {
+ if (iop < bufptr || iop >= itop) {
+ switch (filbuf(fd)) {
+ case EOF:
+ break
+ case ERR:
+ take error action
+ default:
+ # don't loop if record structured device or EOF
+ if (nchars read != buffer size)
+ maxchars = min (maxchars, nchars + nchars read)
+ }
+ }
+ chunk = min (maxchars - nchar, itop - iop)
+ if (chunk <= 0)
+ break
+ else {
+ amovc (Memc[iop], buffer[nchars+1], chunk)
+ iop = iop + chunk
+ nchars = nchars + chunk
+ }
+ }
+
+ if (nchars == 0)
+ return (EOF)
+ else
+ return (nchars)
+end
+
+
+
+
+.tp 5
+procedure write (fd, buffer, maxchars)
+
+begin
+ check that fd is a valid file opened for writing
+ nchars = 0
+
+ while (nchars <= maxchars) {
+ if (iop < bufptr || iop >= otop) {
+ if (flsbuf (fd) == ERR)
+ take error action
+ }
+ chunk = min (maxchars - nchar, otop - iop)
+ if (chunk <= 0)
+ break
+ else {
+ amovc (buffer[nchars+1], Mem[iop], chunk)
+ iop = iop + chunk
+ nchars = nchars + chunk
+ }
+ }
+end
+
+
+
+
+.tp 5
+int procedure filbuf (fd)
+
+begin
+ verify fd: file open with read permission
+
+ if (iop points into pushback buffer) {
+ pop state off pushback stack
+ return (itop - bufptr)
+ # eventually end up back in a real file buffer
+ } else if (no buffers) {
+ call fmkbfs to allocate buffer space for the file
+ # fmkbfs must adjust iop to reflect current file position
+ }
+
+ if (TEXT_FILE)
+ zgettx (fd, file_buffer, nchars)
+ else
+ nchars = ffault (fd, logical_offset_in_file)
+
+ iop = bufptr
+ itop = max (bufptr, bufptr + nchars)
+ otop = bufptr
+
+ return (nchars)
+end
+
+
+
+
+.tp 5
+int procedure flsbuf (fd)
+
+begin
+ verify fd: file open with write permission
+ if (no buffers)
+ call fmkbfs to allocate buffer space
+
+ if (otop = bufptr) {
+ set otop to top of buffer
+ status = OK
+ } else if (TEXT_FILE) {
+ zputtx (channel[fd], file_buffer, status)
+ reset iop to start of buffer
+ } else {
+ status = ffault (fd, logical_offset)
+ }
+
+ return (status)
+end
+.fi
+
+
+.sh
+Buffer Management for Binary Files
+
+ FIO maintains a "current buffer" for each file. A "file pointer"
+is also maintained for each file. The file pointer is the character offset
+within the file at which the next i/o transfer will occur. When the file
+pointer no longer points into the current buffer, a "file fault" occurs.
+The file pointer is modified when, and only when, an i/o transfer or seek
+occurs.
+
+All i/o to binary files is routed through FFAULT. FILBUF and FLSBUF handle
+i/o to text files directly.
+
+FFAULT makes a binary file appear to be a contiguous array (stream) of
+characters, regardless of the device on which the file is resident, and
+regardless of the block size. Image i/o and structure i/o depend on the
+buffer management capabilities of FFAULT for efficient i/o.
+
+FFAULT must be able to deal with variable block size devices. The block
+size is a run time variable, which is device dependent.
+Magtapes and Mem files, for example, have a block size of one char,
+whereas most disks have 256 char blocks (assuming two machine bytes per char).
+
+Image i/o requires that the number and size of the buffers for a file
+be variable, and that asynchronous i/o be possible. The size of a
+buffer, and the size of the data segment to be read in (normally one
+row in the case of two dimensional imagefiles) need not be the same.
+
+Structure or virtual i/o is based on a global pool of buffers, shared
+amongst all the files currently mapped for virtual i/o. Each buffer
+in the pool is always linked into the list for the global pool, and is
+also linked into the local list for a file, when containing data from
+that file. New buffers are allocated from the tail of the global list.
+
+The virtual i/o primitives interface to file i/o via READ and WRITE
+requests on a mapped file. FFAULT is required to manage the global pool
+properly when faulting on a mapped file. The number and size of the
+buffers in the global pool are run time variables.
+
+FFAULT calculates the file offset of the new buffer implied by the offset
+argument (note that offset may be BOF or EOF as well as a real offset).
+No actual i/o takes place if the data is already buffered.
+
+
+
+.tp 5
+.nf
+int procedure ffault (fd, char_offset)
+
+fd: file descriptor number
+char_offset: desired char offset in file
+
+begin
+ calculate buffer_offset (modulus block size)
+ if (i/o in progress on file fd)
+ wait for completion (awatio)
+
+ if (buffer is already in local pool)
+ relink buffer at head of list (frelnk)
+ else {
+ if (buffer has been written into)
+ flush to file (fflsbf)
+ relink next buffer at head of lists (frelnk)
+ set buffer offset for new buffer
+ fill buffer from file (ffilbf)
+ }
+
+ if (file is being accessed sequentially)
+ initiate write behind or read ahead
+
+ set iop corresponding to desired char_offset
+ return (status: OK, ERR, or EOF)
+end
+.fi
+
+
+.sh
+Verification of the File Fault Procedure
+
+ The database managed by FFAULT consists of the local and global
+buffer lists, and the file descriptor structure. The major types of
+file access are (1) sequential read, (2) write at EOF, (3) random
+access, and (4) sequential write not at EOF. A mode change may occur
+at any time. In what follows, we follow the logic of FFAULT through
+for these four modes of access, to verify that FFAULT works properly
+in each case.
+
+.tp 4
+.ls 4 Case 1: Sequential Read
+
+FFAULT will detect the sequential nature of the read requests, and will
+begin reading ahead asychronously. No writing occurs, since the buffer
+is never written into. If a buffer were to be written into, the subsequent
+write i/o operation would cause read ahead to be interrupted for a time
+(random mode would be asserted temporarily).
+
+.ks
+.nf
+ normally, read ahead will be in progress
+ wait for i/o
+ buffer is now in pool
+ relink buffer at head of lists
+ initiate i/o on next available buffer
+
+ when EOF is detected, buffer is zeroed, EOF is returned
+.fi
+.ke
+.le
+
+.tp 4
+.ls Case 2: Sequential Write at EOF
+
+When writing at EOF, FFAULT will detect the fact that the writes are
+occurring sequentially, and will start flushing the newly filled buffers
+asynchronously. Read ahead does not occur, since the file is positioned
+at EOF.
+
+.ks
+.nf
+ normally, write behind will be in progress
+ wait for i/o
+ get next buffer (will not need to be flushed, due to
+ automatic write behind)
+ relink buffer at head of lists
+ fill buffer (no actual file access when at EOF)
+ initiate write behind of most recent buffer
+.fi
+.ke
+.le
+
+.tp 4
+.ls Case 3: Random Access
+
+Old buffer is left in pool. No i/o is done on the old buffer, regardless
+of whether the old buffer has been written into or not (unless there is only
+one buffer in the pool). The buffer pool serves as a cache, with the buffers
+linked in order of most recent access. Read ahead and write behind do not
+occur as long as the pattern of access remains random.
+
+.ks
+.nf
+ no i/o in progress
+ buffer not in pool
+ take buffer from tail of list
+ relink buffer at head of lists
+ if (buffer needs to be flushed)
+ flush it, wait for completion
+ fill buffer
+.fi
+.ke
+.le
+
+.tp 4
+.ls Case 4: Sequential Write not at EOF
+
+This mode differs from write at EOF in that read and write i/o operations
+are interspersed. Since only one i/o operation can be in effect on a
+given file at one time, we cannot both read ahead and write behind.
+Write behind will occur, but reading will not be asynchrounous.
+
+.ks
+.nf
+ wait for i/o
+ buffer not in pool
+ take buffer from tail of list
+ relink buffer at head of lists
+ buffer will not need to be flushed, due to write behind
+ fill buffer, wait for completion
+ initiate write behind of most recent buffer
+.fi
+.ke
+.le
+
+
+
+
+.fi
+In certain circumstances, such as when IMIO overwrites a line of an
+image, where each line is known to be aligned on a block boundary,
+the "fill buffer" operation can be omitted (since it is guaranteed
+that the entire contents of the buffer will be overwritten before the
+buffer is flushed). The fill buffer operation is disabled via an FSET
+option. Both access modes 3 and 4 are affected, yielding a factor
+of two reduction in the number of i/o transfers.
+
+
+
+
+.tp 5
+.nf
+procedure ffilbf (fd, bufdes)
+
+fd: file descriptor number
+bufdes: buffer descriptor
+
+begin
+ if (at EOF)
+ return
+ else {
+ if (io in progress on file fd)
+ call fwatio to wait for completion of transfer
+ fbseek (fd, bufdes)
+ aread (fd, Memc[bufptr], buffer_size)
+
+ set i/o mode word in buffer descriptor
+ set pointer to active buffer in file descriptor
+ }
+end
+
+
+
+.fi
+The FFLSBF routine is called by FFAULT to actually flush a buffer to
+the file. Note that if the buffer is at the end of the file, and the
+buffer is only partially full, a partially full block will be written.
+If partial file blocks are not permitted by the underlying system,
+the z-routine must compensate.
+
+
+
+.tp 6
+.nf
+procedure fflsbf (fd, bufdes)
+
+fd: file descriptor number
+bufdes: buffer descriptor
+
+begin
+ if (no write permission on file)
+ take error action
+ if (io in progress on file fd)
+ call fwatio to wait for completion of transfer
+
+ nchars = max (iop, itop) - bufptr
+ fbseek (fd, bufdes)
+ awrite (fd, Memc[bufptr], nchars)
+
+ set i/o mode word in buffer descriptor
+ set pointer to active buffer in file descriptor
+end
+
+
+
+
+.tp 5
+procedure fwatio (fd)
+
+begin
+ if (i/o mode == NULL)
+ return
+ nchars = await (fd)
+
+ if (nchars == ERR)
+ set ERROR bit in status word
+ else {
+ # set i/o pointers in buffer descriptor
+ if (i/o mode == READ_IN_PROGRESS)
+ itop = bufptr + nchars
+ else
+ # don't change itop, data still valid
+ otop = bufptr
+ clear i/o mode word in buffer descriptor
+ clear pointer to active buffer in file descriptor
+ }
+end
+
+
+
+
+.tp 5
+procedure fbseek (fd, bufdes)
+
+begin
+ if (current_offset != buffer_offset)
+ aseek (fd, buffer_offset)
+end
+
+
+
+
+.fi
+SEEK is used to move the file pointer (offset in a file at which the
+next data transfer will occur). With text files, one can only seek
+to the start of a line, the position of which must have been determined
+by a prior call to NOTE. For binary files, SEEK merely sets the logical
+offset within the file. This will usually cause a file fault when the
+next i/o transfer occurs. An actual physical seek does not occur until
+the fault occurs.
+
+The logical offset is the character offset in the file at which the next
+i/o transfer will occur. In general, there is no simple relationship
+between the logical offset and the actual physical offset in the file.
+The physical offset is the file offset at which the next AREAD or AWRITE
+transfer will occur, and is maintained by those routines and by the system.
+The logical offset may be set to any character in a file. The physical
+offset is always a multiple of the device block size.
+
+The logical offset is defined at all times by the offset of the current
+buffer (buf_offset), and by the offset within the buffer (iop-bufptr).
+The logical offset may take on the special values BOF and EOF.
+Since the offset of the first character in a file is one (1),
+and BOF and EOF are zero or negative, the special offsets are unambiguous.
+
+.rj (logical offset)
+ new iop = offset - buf_offset + bufptr
+
+A logical seek on a binary file is effected merely by setting the in-buffer
+pointer IOP according to the relation shown above. A macro LSEEK (fd, offset)
+is defined to perform a logical seek with inline code.
+.nf
+
+
+
+.tp 5
+procedure seek (fd, offset)
+
+begin
+ verify that fd is a legal file descriptor of an open file
+ clear any pushback
+
+ # make newly written data readable
+ itop = max (itop, iop)
+
+ if (TEXT_FILE) {
+ if (buffer has been written into)
+ call zputtx to flush buffer to file
+ reset iop to beginning of buffer
+ if (offset is not equal to offset of buffer)
+ call zsektx routine to seek on text file
+ } else
+ lseek (fd, offset)
+end
+
+
+
+
+.tp 5
+long procedure note (fd) # note file position for later seek
+
+begin
+ verify that fd is a legal file descriptor of an open file
+
+ if (TEXT_FILE) {
+ call znottx to get offset into text file
+ if (a buffer is in use)
+ save offset of buffer in buffer descriptor
+ return (offset)
+ } else
+ return (logical offset)
+end
+
+
+
+
+.tp 5
+procedure flush (fd)
+
+begin
+ verify fd: file open with write permission
+
+ if (TEXT_FILE)
+ if (buffer has been written into) {
+ call zputtx to write out buffer
+ reset buffer pointers
+ }
+ else
+ for (each buffer in local pool)
+ if (buffer has been written into)
+ call fflsbf to flush buffer
+end
+
+
+
+
+.fi
+The asynchronous i/o primitives ZAREAD and ZAWRIT must enforce device block
+boundaries. Thus, if maxchars is not an integral multiple of the block size,
+the file pointer will nonetheless be advanced to the next block boundary.
+Some files (such as Mem files and magtapes) may have a block size of one char.
+
+Note that memory may be accessed as a "file". This facility is most often
+used by the formatted i/o routines, to decode and encode character data in
+strings. On a virtual memory machine, an entire binary file could be mapped
+into memory, then opened with MEMOPEN as a memory resident file (this would
+in effect replaces the FFAULT file faults by hardware page faults).
+
+The calling program is required to call AWAIT after an AREAD or AWRITE call to
+a file, before issuing the next i/o request to that file. Failure to do so
+causes an error action to be taken. This is done to ensure that the success
+or failure of the i/o transfer (the status returned by AWAIT) is checked by
+the calling program.
+
+The z-routines ZCALL2 and ZCALL3 are machine dependent routines which
+call the procedure whose entry point address is given as the first argument.
+The numeric suffix N means that the procedure given as the first argument is
+to be called with N arguments, the values of which make up the remaining
+arguments to ZCALL. The additional machine dependence of this routine
+is thought to be more than justified by the clean, flexible interface
+which it provides between FIO and the various supported devices.
+.nf
+
+
+
+.tp 5
+procedure aread (fd, buffer, maxchars)
+
+begin
+ check that fd is a valid file opened for reading
+ if (i/o is already in progress on file fd)
+ error: "i/o already in progress"
+ set read_in_progress word in file descriptor
+
+ zcall3 (zaread[fd], channel[fd], buffer, maxchars)
+end
+
+
+
+.fi
+Note that FIO, when it seeks to the end of a file for a buffered binary
+write, actually seeks to the nearest block boundary preceeding the physical
+EOF which is an integral multiple of the file buffer size. When the file
+buffer fills, it is flushed out, OVERWRITING THE EOF. This may pose problems
+for the implementor of the ZAWRITE routine on some systems.
+
+
+
+.tp 5
+.nf
+procedure awrite (fd, buffer, maxchars)
+
+begin
+ check that fd is a valid file opened for writing
+ if (i/o is already in progress on file fd)
+ error: "i/o already in progress"
+ set write_in_progress in i/o mode word in file descriptor
+
+ zcall3 (zawrite[fd], channel[fd], buffer, maxchars)
+end
+
+
+
+
+.tp 5
+int procedure await (fd)
+
+begin
+ verify thaf fd is a legal file descriptor of an open file
+
+ if (bad error code in file descriptor)
+ set status to ERR
+ else if (no io in progress on file fd)
+ return (0)
+ else
+ zcall2 (zawait[fd], channel[fd], status)
+
+ switch (status) {
+ case ERR:
+ set error code in file descriptor
+ case EOF:
+ set EOF flag
+ default:
+ increment file position counter by N file blocks
+ set nchars_last_transfer in file descriptor
+ }
+
+ clear io_in_progress word in file descriptor
+ return (status)
+end
+
+
+
+
+.tp 5
+procedure aseek (fd, offset)
+
+begin
+ switch (offset) {
+ case BOF:
+ char_offset = 1
+ clear at EOF flag
+ case EOF:
+ if (already at EOF)
+ return
+ else {
+ zcall2 (zaseek[fd], channel[fd], EOF)
+ current_offset = anote (fd)
+ char_offset = current_offset
+ set at EOF flag
+ }
+ default:
+ char_offset = offset
+ clear at EOF flag
+ }
+
+ # can seek only to the beginning of a device block
+ block_offset = char_offset - mod (char_offset-1, block_size)
+
+ zcall2 (zaseek[fd], channel[fd], block_offset)
+ if (anote(fd) != block_offset)
+ take error action
+end
+
+
+
+.tp 5
+long procedure anote (fd)
+
+begin
+ zcall2 (zanote[fd], channel[fd], current_offset)
+ return (current_offset)
+end
+.fi
+
+
+
+.sh
+Z-ROUTINES REQUIRED TO INTERFACE TO A BINARY DEVICE
+
+ The interface between FIO and a binary device is defined by a set of
+six so called z-routines. These routines may be as device and system
+dependent as necessary, provided the standard calling sequences and semantics
+are implemented.
+
+The following z-routines are required for each device serviced by FIO.
+Since only the entry point addresses are given to FIO, the actual names
+are arbitrary, but must be distinct to avoid collisions. The names shown
+are reserved.
+
+.ks
+.nf
+ zaread (channel, buffer, maxchars)
+ zawrit (channel, buffer, maxchars)
+ zawait (channel, nchars/EOF/ERR)
+ zaseek (channel, char_offset/BOF/EOF)
+ zanote (channel, char_offset)
+ zblksz (channel, device_block_size_in_chars)
+.fi
+.ke
+
+The exact specifications of these routines will be detailed in the system
+interface documentation.
+
+
+The following binary devices are fully supported by the program interface:
+
+
+.ks
+.nf
+ device type initialization
+
+ regular random access binary files OPEN
+ the CL interface (STDIN,STDOUT,...) task startup
+ pipes CONNECT
+ memory MEMOPEN
+ magnetic tapes MTOPEN
+ graphics devices GOPEN
+.fi
+.ke
+
+
+A new device may be interfaced to FIO at run time with the procedure FIODEV.
+Repetitive calls to FIODEV for the same device are harmless and are
+ignored. The maximum number of devices that may be interfaced to FIO is set
+when FIO is compiled. An error action will occur if this number is exceedd.
+
+ fiodev (zaread, zawrit, zawait, zaseek, zanote, zblksz)
+
+The purpose of FIODEV is to make the entry points of the z-routines for the
+new device known to FIO. The device table is indexed by the entry point
+address of the ZAREAD procedure, which must therefore be distinct for each
+device.
+
+A default device is associated with a file when the file is opened.
+To specify a device other than the default device requires a call to FSET,
+passing the entry point address of the ZAREAD procedure for the device.
+The device must have been installed with the FIODEV call by the time FSET
+is called to associate the device with a particular file, or an error action
+will result.
+
+
+.sh
+SEMICODE FOR THE FIO INITIALIZATION AND CONTROL PROCEDURES
+
+ Before any i/o can be done on a file, the file must be opened. The
+standard OPEN procedure may be used to access ordinary files containing either
+text or binary data. To access a file on one of the special devices, a special
+open procedure must be used (MEMOPEN, MTOPEN, ..).
+
+All file open procedures are alike in that they call the FIO routine
+FGETFD to allocate and initialize (with defaults) a file descriptor.
+Assorted calls to FSET and possibly FIODEV may optionally follow,
+if the default file parameters are not applicable to the device in question.
+
+
+
+
+.ks
+.nf
+ open close
+
+
+
+
+ fgetfd frtnfd flush
+
+
+
+
+ zmapfn zopen malloc mfree zclose
+
+
+
+ Structure of the Initialization Procedures
+.fi
+.ke
+
+
+
+
+.tp 5
+.nf
+int procedure open (file, mode, type)
+
+file: file name (EOS terminated character string)
+mode: type of access permission desired
+type: file type (text or binary)
+
+begin
+ # allocate and initialize file descriptor
+ fd = fgetfd (file, mode, type)
+ if (fd == ERR) {
+ set error code in file descriptor
+ return (ERR)
+ }
+
+ # map virtual file name to OS file name
+ zmapfn (file, osfname, SZ_OSFNAME)
+
+ switch (type) { # open file
+ case TEXT_FILE:
+ zopntx (osfname, mode, channel[fd])
+ case BINARY_FILE:
+ zopenb (osfname, mode, channel[fd])
+ default:
+ set error code in file descriptor
+ channel[fd] = ERR
+ }
+
+ if (channel[fd] == ERR) {
+ frtnfd (fd) # return file descriptor
+ return (ERR)
+ } else
+ return (fd)
+end
+
+
+
+.fi
+To conserve resources (file descriptors, buffer space) a file should be
+closed when no longer needed. Any file buffers that may have been
+created and written into will be flushed before being deallocated.
+
+CLOSE ignores any attempts to close STDIN or CLIN. Attempts to close
+STDOUT, STDERR, or CLOUT cause the respective output byte stream to be
+flushed, but are otherwise ignored. An error action results if one
+attempts to close a file which is not open, or if one attempts to close
+a file which was not opened with OPEN.
+.nf
+
+
+
+.tp 5
+procedure close (fd) # close an opened file
+
+begin
+ if (fd == STDIN || fd = CLIN) {
+ return
+ } else if (fd == STDOUT || fd == STDERR || fd == CLOUT) {
+ flush (fd)
+ return
+ } else if (fd is not a valid file descriptor of an open file) {
+ take error action
+ } else if (file device is not a standard one)
+ take error action
+
+ flush (fd)
+ zclose (channel[fd])
+ frtnfd (fd)
+end
+
+
+
+
+
+.tp 5
+int procedure fgetfd (file, mode, type) # get file descriptor
+
+file: file name (EOS terminated character string)
+mode: type of access permission desired
+type: file type (text or binary)
+
+begin
+ # find an unused file descriptor slot
+ for (fd=FIRST_FD; fd <= LAST_FD; fd=fd+1)
+ if (fdes[fd] == NULL)
+ break
+ if (fd > LAST_FD)
+ return (ERR)
+
+ # allocate memory for file descriptor proper
+ fdes[fd] = malloc (sizeof_struct_fiodes, TY_CHAR)
+ if (fdes[fd] == NULL)
+ return (ERR)
+
+ initialize fields of file descriptor to default values
+ return (fd)
+end
+
+
+
+
+.tp 5
+procedure frtnfd (fd) # return file descriptor and buffers
+
+begin
+ if (fdes[fd] == NULL)
+ return
+
+ # deallocate file buffers, if any
+
+ if (file takes its buffers from the global pool) {
+ if (any buffers were actually ever allocated)
+ decrement reference count of files using global pool
+ for (each buffer in the local list) {
+ unlink buffer from the local list
+ if (global pool reference count is zero) {
+ unlink buffer from the global list
+ return buffer space to the system
+ } else
+ link at tail of the global list
+ }
+ } else
+ for (each buffer in the local list) {
+ unlink buffer from the local list
+ return buffer space to the system
+ }
+
+ if (push back buffer exists)
+ return push-back buffer
+
+ mfree (fdes[fd], TY_CHAR)
+ fdes[fd] = NULL
+end
+.fi
+
+
+.sh
+SETTING AND INSPECTING THE FIO CONTROL PARAMETERS
+
+ Any file may be accessed after specifying only the file name, access
+mode, and file type parameters in the OPEN call.
+Occasionally, however, it is desirable to change the default file control
+parameters, to optimize i/o to the file. The IMIO and VSIO interfaces,
+for example, control the size, number, and ownership of the FIO file buffers.
+
+
+.ks
+.nf
+ fset (fd, parameter, value)
+ value = fget (fd, parameter)
+.fi
+.ke
+
+
+The FSET procedure is used to set the FIO parameters for a particular file,
+while FGET is used to inspect the values of these parameters. The special
+value DEFAULT will restore the default value of the indicated parameter.
+The following parameters are defined:
+
+.ls 4
+.ls 15 ADVICE
+This parameter is used to advise FIO on the type of access expected for
+the file. The legal values are SEQUENTIAL and RANDOM. Given such advice,
+FIO will set up the buffers for the file using system dependent defaults
+for the buffer types, sizes, and numbers. ADVICE is more system independent
+than explicit calls to NBUFFERS, BUF_SIZE, and so on.
+.le
+.ls ASYNC_IO
+If enabled (value = YES), and there are two or more buffers in the pool,
+FIO will employ read ahead and early write behind when a sequential pattern
+of i/o is detected. Specifying NO for this parameter guarantees that
+buffered data will be retained until reuse of a buffer is forced by a fault.
+Note that even if ASYNC_IO is enabled, read ahead and early write behind
+are ONLY used while the pattern of i/o remains sequential.
+.le
+.ls BUF_SIZE
+The size of a file buffer, in chars. The actual size of the buffer
+created and used by FIO depends on the device block size and may be larger
+than BUF_SIZE, but will not be any smaller.
+.le
+.ls BUF_TYPE
+This parameter may have one of two values, LOCAL or GLOBAL, specifying whether
+a local pool of buffers is to be created, or whether buffers are to be drawn
+from the global pool.
+.le
+.ls FIO_DEVICE
+The value given must be the entry point address of the ZAREAD procedure
+for the desired device. The device must have been installed in the FIO
+device table by a prior call to FIODEV.
+.le
+.ls FLUSH_NL
+If enabled, the output buffer will be flushed after every line of output text,
+rather than when the buffer fills or when a flush is otherwise forced.
+Useful when the output file is an interactive terminal.
+.le
+.ls GBUF_SIZE
+The size of a buffer in the global pool, in chars.
+The FD parameter is ignored.
+.le
+.ls GNBUFFERS
+The number of file buffers in the global pool.
+The FD parameter is ignored.
+.le
+.ls NBUFFERS
+The number of file buffers in the local pool.
+.le
+.ls PBB_SIZE
+The size of the combined push back buffer and push back control stack area,
+in chars.
+.le
+.le
+
+
+The parameters controlling the size and number of the various buffers
+(ADVICE, NBUFFERS, BUF_SIZE, BUF_TYPE, PBB_SIZE, GNBUFFERS, GBUF_SIZE) must
+be set before i/o causes the affected buffers to be created using the default
+number and size parameters. Thereafter, FSET calls to change these parameters
+will be ignored. The values of the other parameters may be changed at any
+time, with the new values taking effect immediately.
+
+.sh
+Example 1: File access is expected to be highly random.
+
+ The most system independent approach is to call FSET to set the
+ADVICE parameter to RANDOM.
+
+
+.nf
+ include <fio.h>
+ ...
+
+ fd = open (file, READ_WRITE, BINARY_FILE)
+ if (fd == ERR)
+ ...
+
+ call fset (fd, ADVICE, RANDOM)
+.fi
+
+.sh
+Example 2: High speed sequential access is desired
+
+ In this case, the best approach would again be to call FSET to set ADVICE
+to SEQUENTIAL. To demonstrate use of some of the other parameters, we have
+taken a different approach here.
+
+
+.nf
+ fd = open (file, READ_ONLY, BINARY_FILE)
+ if (fd == ERR)
+ ...
+
+ call fset (fd, NBUFFERS, 2)
+ call fset (fd, BUF_SIZE, SZ_BLOCK * 16)
+ call fset (fd, ASYNC_IO, YES)
+.fi
+
+
+In practice it will rarely be necessary for the user to call FSET, because
+the facilities provided by VSIO and IMIO (which do call FSET in the manner
+shown) will probably provide the desired i/o capability, without need to
+resort to the comparatively low level facilities provided by FIO.
+Another reason for NOT calling FSET is that the system provided defaults
+may indeed be best for the system on which the software is being run.
+
+The default values selected for the FIO parameters may be tuned to the
+particular system. At one extreme, for example, we might provide a global
+pool containing only two buffers, each the size of a single disk block.
+By default, all files would share these buffers, and asynchronous i/o
+would be disabled. This would be the minimum memory configuration.
+At the other extreme, we might allocate two large buffers to each file,
+with asynchronous i/o enabled.
+
+
+.sh
+DETAILS OF THE FIO DATA STRUCTURES
+
+ By this point we have sufficiently detailed information about the
+functioning of FIO to be able to fill in the details of the data
+structures. The FIO database consists of the MAXFD file descriptors,
+the global buffer pool, the descriptor for the global pool, and the
+device table. Each file descriptor controls a local list of buffers,
+and possibly a buffer for pushed back data. A buffer descriptor
+structure is associated with each file buffer.
+
+
+
+.ks
+.nf
+# Static part of file descriptor structures
+
+common fiocom {
+ int gnbufs # size of global pool
+ int gbufsize # size of global buffer
+ int gnref # number of files using gpool
+ struct bufdes *ghead # head of the global list
+ struct bufdes *gtail # tail of the local list
+ int ndev # number of devices
+ int zdev[SZ_DEVTBL] # device table
+ char *iop[MAXFD] # i/o pointer
+ char *itop[MAXFD] # itop for current buffer
+ char *otop[MAXFD] # otop for current buffer
+ char *bufptr[MAXFD] # pointer to current buffer
+ long offset[MAXFD] # offset of the current buffer
+ struct fiodes *fdes[MAXFD] # pointer to rest of fd
+ char osfname[SZ_OSFNAME] # buffer for OS file names
+}
+.fi
+.ke
+
+
+.ks
+.nf
+# Template for dynamically allocated part of file descriptor
+
+struct fiodes {
+ char fname[SZ_FNAME] # file name string
+ int fmode # mode of access
+ int ftype # type of file
+ int fchan # OS file number (channel)
+ int fdev # index into device table
+ int bufsize # size of a file buffer
+ int pbbsize # size of pushback buffer
+ int nbufs # number of local buffers
+ int fflags # flag bits
+ int nchars # size of last transfer
+ int iomode # set if i/o in progress
+ int errcode # error code
+ long fpos # actual file position
+ char *pbbp # pointer to pushback buffer
+ char *pbsp # pushback stack pointer
+ char *pbsp0 # pointer to stack elem 0
+ struct bufdes *iobuf # buffer i/o being done on
+ struct bufdes *lhead # head of local list
+ struct bufdes *ltail # tail of local list
+}
+.fi
+.ke
+
+
+.nf
+# flags (saved in fdes[fd].fflags)
+
+ F_ASYNC # enable async_io
+ F_EOF # true if at EOF
+ F_ERR # set when error occurs
+ F_FLUSHNL # flush after newline
+ F_GLOBAL # local or global buffers
+ F_RANDOM # optimize for rand. access
+ F_READ # read perm on file
+ F_SEQUENTIAL # optimize for seq. access
+ F_WRITE # write perm on file
+.fi
+
+
+
+.ks
+.nf
+# Buffer descriptor structure.
+
+struct bufdes {
+ int b_fd # fd to which buffer belongs
+ int b_iomode # set when i/o in progress
+ int b_bufsize # size of buffer, chars
+ long b_offset # offset of buffer in file
+ char *b_itop # saved itop
+ char *b_otop # saved otop
+ char *b_bufptr # pointer to start of buffer
+ struct bufdes *luplnk # next buffer up, local list
+ struct bufdes *ldnlnk # next buffer down, local list
+ struct bufdes *guplnk # next buffer up, global list
+ struct bufdes *gdnlnk # next buffer down, global list
+}
+.fi
+.ke
+
+
+.sh
+SEMICODE FOR THE FIO DATABASE ACCESS PROCEDURES
+
+ Routines are required to allocate and deallocate buffers,
+and to link and unlink buffers from the buffer lists. Now that the
+data structures have been more clearly defined, we shall go into a
+little more detail in the semicode.
+
+
+.ks
+.nf
+ fmkbfs
+
+
+
+ fmklst
+
+
+
+ flnkhd fmkbuf flnktl
+
+
+
+ malloc
+
+
+
+ Structure of the Buffer Allocation Procedures
+.fi
+.ke
+
+
+
+The main buffer creation procedure, FMKBFS, is called by either
+FILBUF or FLSBUF when i/o is first done on a file. FMKLST allocates
+a set of buffers and links them into a doubly linked list. FLNKHD
+links a buffer at the head of a list, while FLNKTL links a buffer at
+the tail of a list. FMKBUF calls MALLOC to allocate memory for a file
+buffer, and initializes the descriptor for the buffer.
+
+
+
+
+.tp 5
+.nf
+procedure fmkbfs (fd)
+
+fd: file descriptor number
+fp: pointer to file descriptor
+bp: pointer to buffer descriptor
+
+begin
+ if (use global pool) {
+ if (no buffers in global pool yet) {
+ gnbufs = fmklst (NULL, gnbufs, gbufsize, GLOBAL)
+ if (gnbufs <= 0) # can't make buffers
+ take error action
+ }
+ gnref = gnref + 1
+
+ } else { # create local buffers
+ adjust bufsize to be an integral number of device blocks
+ fp = fdes[fd]
+ fp.nbufs = fmklst (fd, fp.nbufs, bufsize, LOCAL)
+
+ if (fp.nbufs == 0) # must be at least one
+ take error action
+ }
+end
+
+
+
+.fi
+Unlink a buffer from whatever lists it is on, relink it at head of the
+local list, and also at head of global list if a mapped file. Called
+by FFAULT.
+.nf
+
+
+.tp 5
+procedure frelnk (fd, bp)
+
+fd: file descriptor number
+bp: pointer to buffer descriptor
+
+begin
+ # relink buffer at head of the local list for file fd
+ call funlnk (bp, LOCAL)
+ call flnkhd (fd, bp, LOCAL)
+
+ # relink at head of global list, if buffer in global pool
+ if (buffer is linked into the global pool) {
+ call funlnk (bp, GLOBAL)
+ call flnkhd (fd, bp, GLOBAL)
+ }
+end
+
+
+
+
+.tp 5
+int procedure fmklst (fd, nbufs, bufsize, list) # make list
+
+list: either global or local
+bufdes: pointer to buffer descriptor
+
+begin
+ for (nb=0; nb <= nbufs; nb=nb+1) {
+ bufdes = fmkbuf (fd, bufsize)
+ if (bufdes == NULL)
+ break
+ else if (nb == 1)
+ flnkhd (fd, bufdes, list)
+ flnktl (fd, bufdes, list)
+ }
+ return (nb)
+end
+
+
+
+
+.tp 5
+int procedure fmkbuf (fd, bufsize) # make a buffer
+
+begin
+ assert (bufsize > 0 && mod (bufsize, block_size) == 0)
+
+ sizeof_buffer = sizeof (struct bufdes) + bufsize
+ bufdes_pointer = malloc (sizeof_buffer, TY_CHAR)
+ if (bufdes_pointer == NULL)
+ return (NULL)
+ else {
+ initialize buffer descriptor
+ return (bufdes_pointer)
+ }
+end
+
+
+
+
+.tp 5
+procedure flnkhd (fd, bp, list) # link buf at head of list
+
+fd: file descriptor number
+bp: pointer to buffer descriptor
+list: global or local
+fp: pointer to file descriptor
+
+begin
+ assert (bp != NULL)
+ assert (list == LOCAL || list == GLOBAL)
+
+ switch (list) {
+ case GLOBAL:
+ if (buffer not already linked at head of list) {
+ bp.gdnlnk = ghead
+ ghead.guplnk = bp
+ ghead = bp
+ }
+ case LOCAL:
+ fp = fdes[fd]
+ if (buffer not already linked at head of list) {
+ bp.fd = fd
+ bp.ldnlnk = fp.lhead
+ if (fp.lhead != NULL)
+ fp.lhead.luplnk = bp
+ fp.lhead = bp
+ }
+ }
+end
+
+
+
+
+.tp 5
+procedure flnktl (fd, bp, list) # link buf at tail of list
+
+fd: file descriptor number
+bp: pointer to buffer descriptor
+list: global or local
+fp: pointer to file descriptor
+
+begin
+ assert (bp != NULL)
+ assert (list == LOCAL || list == GLOBAL)
+
+ switch (list) {
+ case GLOBAL:
+ if (buffer not already linked at tail of list) {
+ bp.guplnk = gtail
+ gtail.gdnlnk = bp
+ gtail = bp
+ }
+ case LOCAL:
+ fp = fdes[fd]
+ if (buffer not already linked at tail of list) {
+ bp.fd = fd
+ bp.luplnk = fp.ltail
+ if (fp.ltail != NULL)
+ fp.ltail.ldnlnk = bp
+ fp.ltail = bp
+ }
+ }
+end
+
+
+
+
+.tp 5
+procedure flnkto (fd, bp, to) # link buf bp after to
+
+bp: pointer to descriptor of buffer to be linked
+to: pointer to descriptor of buffer to be linked to
+
+begin
+ bp.ldnlnk = to.ldnlnk
+ bp.luplnk = to
+ to.ldnlnk = bp
+ if (bp.ldnlnk == NULL)
+ fdes[fd].ltail = bp # new tail of list
+ else
+ bp.ldnlnk.luplnk = bp
+end
+
+
+
+
+.tp 5
+procedure funlnk (bp, list) # unlink from list
+
+bp: pointer to buffer descriptor
+list: global or local
+fp: pointer to file descriptor
+
+begin
+ switch (list) {
+
+ case GLOBAL:
+ if (buffer is at head of the global list)
+ ghead = bp.gdnlnk
+ if (buffer is at tail of the global list)
+ gtail = bp.guplnk
+ if (bp.guplnk != NULL)
+ bp.guplnk.gdnlnk = bp.gdnlnk
+ if (bp.gdnlnk != NULL)
+ bp.gdnlnk.guplnk = bp.guplnk
+
+ case LOCAL:
+ fp = fdes[bp.fd]
+ if (buffer is at head of the local list)
+ fp.lhead = bp.ldnlnk
+ if (buffer is at tail of the local list)
+ fp.ltail = bp.luplnk
+ if (bp.luplnk != NULL)
+ bp.luplnk.ldnlnk = bp.ldnlnk
+ if (bp.ldnlnk != NULL)
+ bp.ldnlnk.luplnk = bp.luplnk
+ }
+end
+.fi
+
+
+.sh
+SEMICODE FOR FFAULT, AGAIN
+
+ The file fault procedure lies at the heart of FIO. Now that the
+data structures, initialization procedures, and linked list operators are
+clearer, it is time to go back and fill in some of the details in FFAULT.
+
+
+
+.tp 5
+.nf
+int procedure ffault (fd, char_offset)
+
+fd: file descriptor number
+char_offset: desired char offset in file
+bp: pointer to a buffer descriptor
+fp: pointer to the file descriptor
+
+begin
+ # calculate buffer_offset (modulus file buffer size)
+ buffer_offset = char_offset - mod(char_offset, buffer_size) + 1
+
+ # compute pointers to fd structure, current buffer
+ fp = fdes[fd]
+ bp = fp.lhead
+
+ # update i/o pointers in the buffer descriptor
+ # note writes may have pushed iop beyond original itop
+ itop[fd] = max(itop[fd], iop[fd])
+ if (bp != NULL) {
+ bp.b_itop = itop[fd]
+ bp.b_otop = otop[fd]
+ }
+
+ # if buffer is found in local pool, relink at head of list.
+ if (ffndbf (fd, buffer_offset, bp) == YES) {
+ frelnk (fd, bp)
+ itop[fd] = bp.b_itop
+ otop[fd] = bp.b_otop
+
+ # this next section of code is invoked whenever a fault
+ # occurs which requires an actual i/o transfer.
+
+ } else {
+ if (bp.otop != bp.b_bufptr) # buffer dirty?
+ fflsbf (fd, bp) # flush buffer
+
+ frelnk (fd, bp) # relink at head
+ bp.b_offset = buffer_offset
+
+ if (F_READ flag is set) {
+ ffilbf (fd, bp) # fill buffer
+ fwatio (fd)
+ } else {
+ bp.b_itop = bp.b_bufptr
+ bp.b_otop = bp.b_bufptr
+ }
+
+ # if asynchronous i/o is enabled (only if two or more
+ # buffers) initiate write behind or read ahead, if
+ # fwatio has detected a sequential pattern of i/o.
+
+ if (ASYNC_IO enabled)
+ switch (io_pattern) {
+ case WSEQ: # write behind
+ bufp = bp.ldnlnk
+ if (bufp != NULL)
+ if (bufp.b_otop != bufp.b_bufptr)
+ fflsbf (fd, bufp)
+ case RSEQ: # read ahead
+ new_buffer_offset = buffer_offset + buffer_size
+ if (ffndbf (fd, new_buffer_offset, bufp) == YES)
+ # skip read ahead, buffer already in pool
+ else if (bufp.b_otop == bufp.b_bufptr) {
+ if (bufp.luplnk != fp.lhead) {
+ funlnk (bufp, LOCAL)
+ flnkto (bp, bufp, fp.lhead)
+ }
+ if (buffer in global pool) {
+ funlnk (bufp, GLOBAL)
+ flnkhd (bufp, GLOBAL)
+ }
+ bufp.b_offset = new_buffer_offset
+ ffilbf (fd, bufp)
+ }
+ }
+ }
+
+ bufptr[fd] = bp.b_bufptr # set i/o pointers
+ offset[fd] = buffer_offset
+ lseek (fd, char_offset)
+
+ if (fp.status == ERR) # check for ERR,EOF
+ return (ERR)
+ else if (iop[fd] == itop[fd])
+ return (EOF)
+ else
+ return (itop[fd] - iop[fd]) # return nchars
+end
+
+
+
+
+# Search for a file buffer. If found, return buffer pointer in BP,
+# otherwise allocate a buffer from the tail of either the global or
+# local list.
+
+
+.tp 5
+int procedure ffndbf (fd, buffer_offset, bp)
+
+begin
+ # desired buffer may be on the way; wait and see
+ if (read in progress on file fd)
+ fwatio (fd)
+
+ # search local pool for the buffer
+ for (bp = fp.lhead; bp != NULL; bp = bp.ldnlnk)
+ if (bp.b_offset == buffer_offset)
+ break
+
+ # if buffer already in pool, return buffer pointer,
+ # otherwise use oldest buffer in appropriate list.
+
+ if (bp != NULL) # buffer found in pool
+ return (YES)
+ else { # use buffer at tail of list
+ if (this file uses global pool) {
+ bp = gtail
+ if (io in progress on this buffer)
+ fwatio (bp.fd)
+ } else
+ bp = fp.ltail
+ return (NO)
+ }
+
+end
+.fi
+
+
+.sh
+SUMMARY OF THE FIO/OS INTERFACE (MACHINE DEPENDENT PRIMITIVES)
+
+ FIO depends on a number of machine dependent primitives. Many of these
+have been introduced in the semicode. Other primitives are not involved in
+i/o, and hence have not appeared thus far in the discussion. Primitives are
+required to map virtual file names into OS file names.
+
+The goal in designing the FIO/OS interface was to make the primitives as
+"primitive" as feasible, rather than to minimize the number of primitives.
+These primitives should be easy to implement on almost any modern minicomputer.
+The ideal target OS will provide asynchronous, random access i/o,
+logical name facilities, multiple directories per task, multitasking and
+intertask communication facilities, and dynamic memory allocation/deallocation
+facilities.
+
+
+.nf
+Text Files
+
+ zopntx (osfn, access_mode; chan)
+
+ zgettx (chan, line_buf, maxchars; nchars)
+ zputtx (chan, line_buf, nchars; nchars)
+ zflstx (chan)
+ zfsttx (chan, what; status_info)
+ zclstx (chan)
+ zsektx (chan, znotln_offset; status)
+ znottx (chan; file_offset)
+
+
+Binary File Initialization (one set per device)
+
+ zopnbf (osfn, access_mode; chan)
+ zfaloc (osfn, nchars; chan)
+
+
+Binary File I/O primitives (one set per device)
+
+ zaread (chan, buffer, maxchars, file_offset)
+ zawrit (chan, buffer, maxchars, file_offset)
+ zawait (chan; status)
+ zfsttb (chan, what; status_info)
+ zclsbf (chan; status)
+
+ standard devices: regular files, inter-task pipes (CL,GIO),
+ memory, magnetic tapes.
+
+
+Virtual File Name Mapping
+
+ zmapfn (vfn, osfn, maxch)
+ zabsfn (vfn, osfn, maxch)
+
+
+File Manipulation, Status, File Protection, Temporary Files
+
+ zacces (osfn, mode, type; status)
+ zfdele (osfn; status)
+ zrenam (from_osfn, to_osfn; status)
+ zfprot (osfn)
+ zmktmp (root, temp_file_osfn)
+
+
+Other Dependencies (also used outside of FIO)
+
+ zcallN (entry_point, arg1, ..., argN)
+ pntr = malloc (nelements, data_type)
+ pntr = calloc (nelements, data_type)
+ mfree (pntr, data_type)
+ int = and (int, int)
+ int = or (int, int)
+ int = loc (reference)
+.fi
+
+
+The STATUS returned by the Z-routines may be ERR or a meaningful number,
+such as the channel number or number of characters read or written.
+EOF is signified at this level by a return value of zero for the number
+of characters read (only ZGETTX and ZAREAD read from a file). There is
+no provision for special error codes or messages at the Z-routine level.
diff --git a/sys/fio/doc/fio.men b/sys/fio/doc/fio.men
new file mode 100644
index 00000000..a0b2ddb6
--- /dev/null
+++ b/sys/fio/doc/fio.men
@@ -0,0 +1,50 @@
+ access - Determine the type or accessibility of a file
+ aread - Asynchronous read from a binary file
+ areadb - Asynchronous read from a binary file in byte units
+ await - Wait for an asynchronous i/o transfer to complete
+ awaitb - Wait for i/o, and return status in byte units
+ awrite - Asynchronous write to a binary file
+ awriteb - Asynchronous write to a binary file in byte units
+ close - Close a file
+ delete - Delete a file
+ diropen - Open a directory as a text file
+ falloc - Preallocate (uninitialized) storage for a file
+ fcopy - Copy a file
+ fdevbf - Install a new binary device in the FIO device table
+ fdevtx - Install a new text device in the FIO device table
+ finfo - Get directory information for a file
+ flush - Flush any buffered output to a file
+ fnextn - Extract the extension field of a filename
+ fnldir - Extract the logical directory field of a filename
+ fnroot - Extract the root field of a filename
+ fntcls - Close unbuffered list
+ fntclsb - Close buffered list
+ fntgfn - Get next filename from unbuffered list
+ fntgfnb - Get next filename from buffered list
+ fntlenb - Get number of filenames in a buffered list
+ fntopn - Open an unbuffered filename list
+ fntopnb - Expand template and open a buffered filename list
+ fntrewb - Rewind the list
+ fopnbf - Open a binary file on a special device
+ fopntx - Open a text file on a special device
+ fowner - Get the name of the owner of a file
+ fpathname - Get the full pathname of a file
+ fseti - Set an integer FIO parameter
+ fstati - Get the value of an integer FIO parameter
+ fstatl - Get the value of a long integer FIO parameter
+ fstats - Get the value of a string valued FIO parameter
+ getc - Get the next character from a file
+ getline - Get the next line from a text file
+ mktemp - Make a unique temporary filename
+ note - Note the long integer position in a file for a later seek
+ open - Open or create a text or binary file
+ protect - Protect a file from deletion
+ putc - Put a character to a file
+ putcc - Put only printable characters to a file
+ putline - Put a line to to a text file
+ read - Read a binary block of data from a file
+ rename - Change the name of a file
+ reopen - Reopen a file on another file descriptor
+ seek - Set the file offset of the next char to be read or written
+ stropen - Open a character string as a file
+ write - Write a binary block of data to a file
diff --git a/sys/fio/doc/vfn.hlp b/sys/fio/doc/vfn.hlp
new file mode 100644
index 00000000..d6c20e8b
--- /dev/null
+++ b/sys/fio/doc/vfn.hlp
@@ -0,0 +1,1028 @@
+.help vfn Jul84 "Virtual Filename Mapping"
+.ce
+\fBVirtual Filename Mapping Package\fR
+.ce
+Detailed Design
+.ce
+Doug Tody
+.ce
+July 1984
+.sp 2
+.NH
+Introduction
+
+ This document presents the detailed design of the filename mapping
+code, used by FIO to map virtual filenames (VFN's) to host operating system
+filenames (OSFN's) and back again. A description of the filename mapping
+algorithm is given in \fIThe Reference Manual for the IRAF System Interface\fR,
+May 1984. The purpose of this document is more to design the software than
+to document the design, hence much is omitted. The discussion concentrates
+on those aspects of the problem which were least-understood at the time of
+the design.
+
+.sh
+Primary Functions
+
+.nf
+ map vfn->osfn
+ map osfn->vfn
+.fi
+
+.sh
+Functions for accessing the vfnmap file
+
+.nf
+ open and optionally lock vfnmap file
+ close and unlock vfnmap file
+
+ add entry to vfnmap
+ delete entry from vfnmap
+ lookup entry in vfnmap
+.fi
+
+.sh
+Mapping Functions
+
+.nf
+ extract OSDIR prefix
+ extract LDIR prefix
+ expand LDIR
+ fold subdir into OSDIR
+ encode filename via escape sequence encoding
+ decode encoded filename
+ squeeze filename
+ map filename extension
+.fi
+
+
+.nh
+VFN Virtual Filename Mapping Package
+
+ The VFN package is used to map and unmap virtual filenames and to add and
+delete virtual filenames from the VFN database. A distinct open operation is
+required for each vfn to be accessed. Any number of vfn's may be simultaneously
+open for reading, but only \fIone\fR vfn may be opened for writing.
+The mapping file is not physically opened unless the escape sequence encoded
+filename is degenerate. It is intended that the vfn will be opened for only
+a brief period of time to minimize the amount of time that the mapping file
+is locked. The mapping file is locked only if the vfn is degenerate and the
+access mode is VFN_WRITE. The recognized vfn access modes are VFN_READ,
+VFN_WRITE, and VFN_UNMAP (for reading directories).
+
+
+.ks
+.nf
+ vp = vfnopen (vfn, mode)
+ vfnclose (vp, update)
+ stat = vfnmap (vp, osfn)
+ stat = vfnadd (vp, osfn)
+ stat = vfndel (vp, osfn)
+ stat = vfnunmap (vp, osfn, vfn)
+
+ stat = fmapfn (vfn, osfn) [=:vfnopen/RO,vfnmap,vfnclose]
+.fi
+.ke
+
+
+A distinction is made between mapping the filename and opening and closing
+the vfn to permit efficient and secure error recovery. The mapping file is
+not updated on disk until the physical file operation (create, delete, etc)
+has succeeded. If the operation fails \fBvfnclose\fR is called with NO_UPDATE
+and the mapping file is not touched. The the vfn was opened VFN_READ the
+update flag is ignored. No vfn disk data structures will be modified
+if a vfn is closed with NO_UPDATE set. If updating is enabled, ".zmd"
+dependency files may be created or deleted, the mapping file may be created,
+deleted, or updated.
+
+The procedure \fBvfnmap\fR returns ERR if the vfn is degenerate but no entry
+could be found in the mapping file, i.e., if the file does not exist.
+A status value of OK does not, however, imply that the file exists.
+\fBVfnadd\fR returns ERR if the vfn is degenerate and an entry already
+exists in the mapping file. If the status return is OK and the vfn is
+degenerate then a new entry has been added to the mapping file.
+\fBVfndel\fR returns ERR if the vfn is degenerate but no entry
+could be found in the mapping file. \fIOsfn\fR is returned as a packed string.
+The output buffer should be dimensioned SZ_PATHNAME.
+
+.nh
+Semicode for Selected FIO Procedures
+
+ The RO class procedures call FMAPFN to map the VFN of an existing file
+into an OSFN. These operations are straightforward since the vfn database
+is not affected.
+
+.ks
+.nf
+ access, fchdir, finfo, fpath, fprot: RO operations
+ falloc, open/NF, fmkcopy: RW=ADD procedures
+ delete RW=DEL procedure
+ rename RW=DEL+ADD
+.fi
+.ke
+
+
+.nf
+# FALLOC -- Create a new file and allocate uninitialized storage. Open/NF and
+# make copy are similar operations hence the semicode is not shown.
+
+procedure falloc (vfn, size)
+
+begin
+ # Map filename and determine if a file already exists with the
+ # same name.
+ vp = vfnopen (vfn, VFN_WRITE) # LOCK
+ if (vfnadd (vp, osfn) == ERR)
+ existing_file = yes
+ else {
+ call zfacss to see if file exists
+ existing_file = yes if file exists
+ }
+
+ # If file exists and clobber is enabled, try to delete the file.
+ # If filename is degenerate, entry is either already in mapping file
+ # (if file exists), or has been added.
+
+ if (existing_file)
+ iferr {
+ if (file clobber enabled)
+ delete file
+ else
+ error ("falloc would clobber file 'vfn'")
+ } then {
+ vfnclose (vp, NO_UPDATE)
+ erract (EA_ERROR)
+ }
+
+ # Allocate the new file and update the filename mapping database.
+
+ call ZFALOC to allocate the file
+ if (failure) {
+ vfnclose (vp, NO_UPDATE)
+ error ("cannot allocate file 'vfn'")
+ } else
+ vfnclose (vp, UPDATE) # UNLOCK
+end
+
+
+# DELETE -- Delete a file and all subfiles. A subfile is a file which is
+# logically part of the parent file but which is physically a separate file
+# at the kernel level. An example is the pixel storage file associated with
+# an image. Whenever a file is deleted all subfiles must be deleted as well.
+# The subfiles need not reside in the same directory as the main file.
+# Subfile information is maintained in a separate, "invisible" file for each
+# file having subfiles. The subfile list file has the same vfn as the main
+# file with the extension ".sfl" appended. If the vfn already had an extension
+# it is retained in the root of the new filename. For example, the vfn of the
+# subfile list file for "data.db" would be "data.db.sfl".
+
+procedure delete (vfn)
+
+begin
+ # Delete the main file
+ fdelpf (vfn)
+
+ # Delete any subfiles. Print warning message if a subfile appears
+ # in the list but cannot be deleted.
+
+ ifnoerr (fd = fsf_open (vfn, READ_ONLY)) {
+ while (getline (fd, subfilename, SZ_FNAME) != EOF)
+ iferr (fdelpf (subfilename))
+ call erract (EA_WARN)
+ close (fd)
+ }
+end
+
+
+# FDELPF -- Delete a single physical file. Check if the file is protected
+# and do not try to delete the file if it is protected. If file cannot be
+# deleted, determine why and print appropriate error message, and do not update
+# the mapping file.
+
+procedure fdelpf (vfn)
+
+begin
+ vp = vfnopen (vfn, VFN_WRITE) # LOCK
+ if (vfndel (vp, osfn) == ERR) {
+ vfnclose (vp, NO_UPDATE)
+ error ("attempt to delete a nonexistent file (vfn)")
+ }
+
+ call ZFPROT to check for file protection
+ if (file is protected) {
+ vfnclose (vp, NO_UPDATE)
+ error ("attempt to delete a protected file (vfn)")
+ }
+
+ call ZFDELE to delete the file
+ if (failure) {
+ vfnclose (vp, NO_UPDATE)
+ call ZFACCS to determine if file exists
+ if (no such file)
+ error ("attempt to delete a nonexistent file (vfn)")
+ else
+ error ("cannot delete file 'vfn'")
+ }
+
+ vfnclose (vp, UPDATE) # UNLOCK
+end
+
+
+# RENAME -- Rename a file. A file may be renamed within a single directory
+# or may be moved to another directory by the rename operation. Note that
+# we may only have one VFN opened for writing at a time.
+
+procedure rename (oldvfn, newvfn)
+
+begin
+ # Delete old filename from VFN database.
+ vp = vfnopen (oldvfn, VFN_WRITE)
+ if (vfndel (vp, oldosfn) == ERR) {
+ vfnclose (vp, NO_UPDATE)
+ error ("attempt to rename a nonexistent file (vfn)")
+ } else
+ vfnclose (vp, UPDATE)
+
+ # Add new filename to VFN database.
+ vp = vfnopen (newvfn, VFN_WRITE)
+ if (vfnadd (vp, newosfn) == ERR) {
+ vfnclose (vp, NO_UPDATE)
+ error ("cannot create new file 'vfn'")
+ } else
+ vfnclose (vp, UPDATE)
+
+ # Rename the physical file.
+ call ZFRNAM to rename the file
+
+ # Patch up VFN database if the rename operation fails. If the rename
+ # fails then most likely the OSFN's were short and no mapping file
+ # access was involved (else we would have had an abort above), but
+ # then the calls cost almost nothing so make them anyhow.
+
+ if (rename fails) {
+ # Restore old filename.
+ vp = vfnopen (oldvfn, VFN_WRITE)
+ vfnadd (vp, oldosfn)
+ vfnclose (vp, UPDATE)
+
+ # Delete new filename.
+ vp = vfnopen (newvfn, VFN_WRITE)
+ vfndel (vp, newosfn)
+ vfnclose (vp, UPDATE)
+
+ error ("cannot rename file (oldvfn -> newvfn)")
+ }
+end
+.fi
+
+.nh
+Locking and Concurrency Considerations
+
+ A locking mechanism is necessary to prevent two or more processes from
+simultaneously modifying a mapping file. The dimensions of the problem are
+as follows:
+
+.ls
+.ls [1]
+Mutual exclusion must be guaranteed. The period of time during which a process
+opens and reads the mapping file, modifies it, and updates the file on disk
+is the critical section. The locking protocol must guarantee that only one
+process can be in the critical section at a time. A read-only access of the
+mapping file is not a critical section, but we must guarantee that the file
+is not in the process of being written when such a read occurs.
+.le
+.ls [2]
+Deadlock must either be prevented or it must be detected and broken.
+Deadlock will eventually occur if a process is permitted to simultaneously
+access more than one mapping file. Deadlock will occur if process A locks
+directory D1 and process B locks D2, then B tries to lock D1 and A tries to
+lock D2.
+.le
+.ls [3]
+Lockout will occur if a process dies while in the critical section, thus
+failing to remove the lock.
+.le
+.le
+
+
+On a system which provides file locking, i.e., which forbids a process
+access to a file which is open with write permission by another process,
+the host OS guarantees mutual exclusion and protection from lockout.
+Unfortunately many UNIX systems (and probably some other systems as well)
+do not provide file locking. The scheme discussed in this section is
+awkward but provides secure locking on such systems. The file locking
+facilities discussed herein are designed to make use of host system file
+locking if available. The discussion is oriented towards the problems
+of providing locking on systems which do not provide locking at the kernel
+level, i.e., in \fBzopnbf\fR.
+
+.nh 2
+Mutual Exclusion
+
+ Mutual exclusion can be guaranteed by use of a \fBsemaphore\fR.
+The transportability requirement makes it very difficult to implement a
+general semaphore, but a binary semaphore is possible using a null length
+file in the same directory as the mapping file. To implement a semaphore
+we must test and set the lock all in the same operation, to prevent
+interleaving of the operations by two processes simultaneously trying to
+set a lock (i.e., process A tests for a lock and finds none, B tests for a
+lock and finds none, A sets a lock, B sets a lock, and mutual exclusion is
+violated).
+
+A suitable binary semaphore can be implemented by \fIdeleting\fR the lock
+file to set the lock, rather than by testing for the lock (no lock file)
+and then creating the lock file to set the lock. We assume that the delete
+operation will return error for an attempt to delete a nonexistent file.
+Thus if the lock file can be successfully deleted, the lock has been tested
+and found to be absent and the directory has been locked, all in one
+indivisible kernel operation.
+
+
+.ks
+.nf
+ # Gain exclusive access to a file. The file must have an
+ # associated lockfile which is deleted while a process has
+ # the file locked.
+
+ while (delete (lockfile) == ERR)
+ ;
+
+
+ # Give up exclusive access to a file.
+ create (lockfile)
+.fi
+.ke
+
+
+The above is a bit simplistic because the file itself may not exist,
+in which case there will be no lockfile, and the process may not have
+delete permission for the lockfile if there is one. The point here is
+that the OS kernel guarantees that only one process will be allowed
+to successfully delete the lockfile, hence the deletion operation can
+serve to gain exclusive access to a file. The problem of lockout, wherein
+the lockfile gets lost, is dealt with later.
+
+Locking the directory is necessary whenever the mapping file is to be modified.
+While it is not necessary to lock the directory to read the mapping file,
+by not doing so we run the risk of trying to read while the file is being
+written to (permissible on some systems, an error condition on others).
+The simplest solution to this problem is to lock the file for all accesses,
+including reads as well as writes. The problem with this approach is that
+it precludes read access on directories for which a process does not have
+write permission (preventing generation of the lock file). This is not
+acceptable. Our solution is to include a \fBchecksum\fR in the mapping file.
+If the file exists but cannot be opened for reading and a lock exists on the
+directory, we will wait until the lock is lifted to read the file. If the
+checksum is in error the read will be repeated until a valid checksum is
+obtained.
+
+.nh 2
+Deadlock
+
+ Deadlock can be avoided by the simple expedient of permitting a process to
+lock only one directory at a time. The only time a process needs to lock
+more than one directory is when renaming a file with a long, degenerate name
+from one directory to another. Deadlock is unlikely but would certainly
+occur at infrequent intervals. Locking only one directory at a time is
+inefficient (because separate references are needed to map the filename
+and to edit the mapping file), but it does not matter since lock file
+accesses are expected to be infrequent (few mapped filenames are degenerate).
+Detection of and breaking of deadlock is possible but not worth the trouble.
+Thus we shall avoid the problem of deadlock entirely by permitting a process
+to lock only a single directory at a time, for only a brief period of time.
+
+.nh 2
+Lockout
+
+ At this point we have a solution which guarantees mutual exclusion and the
+avoidance of deadlock nearly 100% of the time. The only problem remaining
+is \fBlockout\fR. It is not possible to prevent lockout since we cannot
+guarantee that a process (or the computer) will not die while in a critical
+section, preventing removal of the lock.
+
+The obvious way to implement automatic recovery from lockout is to add a
+provision for timeout. While we cannot guarantee that the time spent
+in a critical section will be less than some absolute amount (because of
+variable load conditions, swapping, the time required to delete a very
+large file, etc.), we can say that the time spent in a critical section will
+rarely be larger than some number on the order of one second. In a worst
+case situation where several processes are heavily accessing a directory
+it could take an arbitrarily long time for a particular process to gain a
+lock on the directory, but this is very unlikely.
+
+If a process times out while waiting we must either abort or proceed to break
+the lock. This may be done by creating a new lockfile as if the transaction
+had been completed. There is a hidden bug, however -- if two or
+more processes timeout simultaneously, the following scenario might occur:
+
+
+.kf
+.nf
+ A times out
+ B times out
+ A breaks the lock
+ A enters wait loop and places a new lock,
+ entering the critical section
+ B breaks the lock set by A
+ B enters wait loop and places a new lock,
+ entering the critical section
+ [...mutual exclusion is violated...]
+.fi
+.ke
+
+
+No matter how unlikely this scenario might be, it prevents us from using the
+simple technique to break the lock. Breaking the lock appears to be another
+critical section, so perhaps we can use another semaphore to protect the lock
+(we ignore the complications of checking for write permission on the directory,
+which should be dealt with when the lock is set).
+
+Even if a semaphore is used concurrency
+can still be a problem, as another process may timeout and break the lock
+shortly after the first process has done so; this can happen because the
+section between timeout and the test for permission to break the lock is
+interruptable. To get around this we apply an additional constraint
+that the lock can only be broken if it has been in place for a specified
+interval of time which is much larger than the timeout interval. This suffices
+to recover from a process crash and prevents two processes from breaking
+the lock at almost the same time.
+
+
+.ks
+.nf
+ # Try to set a lock on the directory. If we timeout, try to get
+ # permission to break the lock; only one process is permitted to
+ # break the lock, and the lock can only be broken once in a
+ # specified interval of time. The timelock files are normally
+ # created whenever the directory is locked.
+
+ repeat {
+ while (delete (lockfile) == ERR)
+ if (timeout)
+ if (delete (timelock1) != ERR) {
+ get creation date of timelock2
+ if (timelock2 is an old file) {
+ create (lockfile)
+ delete (timelock2); create (timelock2)
+ create (timelock1)
+ } else
+ create (timelock1)
+ }
+ } until (lock is established)
+
+ # Back to normal.
+ carry out transaction
+ create (lockfile)
+.fi
+.ke
+
+
+Lockout is still possible if the process or the computer dies in the interval
+between deletion and creation of timelock1, but the chances of that happening
+are very remote because the interval is short and it only occurs during
+recovery from lockout. An additional check should perhaps be provided to
+detect this unlikely circumstance and break the lock without further ado
+if timelock1 somehow gets permanently deleted. The mapping file can be
+checkpointed when this occurs to minimize the risk.
+
+.nh 2
+Rollback
+
+ Unfortunately, automatic lockout detection and recovery brings with it
+the possibility that the lock will be broken when a process takes an abnormally
+long time to complete a transaction. This might happen when a heavily loaded
+system has begun swapping processes, or when a background job with a
+very low priority accesses a directory. We must be able to detect that the
+lock has been broken and \fIrollback\fR the transaction, i.e., obtain a new
+lock and try again, repeating the unsuccessful transaction.
+
+Timeouts leading to improper breaking of the lock are not a problem if the
+host system provides file locking for files opened for writing. After placing
+the lock on a directory a process will open the mapping file with readwrite
+permission and all other processes will be locked out until the transaction
+completes. Unfortunately file locking is not provided on all systems (e.g.,
+many versions of UNIX do not provide file locking).
+
+Secure protection from a broken lock is difficult because if we check that
+the lock is still in place and then perform the update, another process may
+break the lock immediately after we check that the lock is in place and
+before the update occurs. About the best we can do is check the creation time
+on timelock2 immediately before updating, updating only if the timelock has
+not been touched since we created it at lock time. If the lock has been
+broken our timelock file will have been deleted and the transaction must be
+rolled back. If a lot of time remains on the lock we go ahead and perform
+the update, otherwise a new timelock2 is written, providing a time equal to
+the minimum lifetime of a lock in which to update the file.
+
+
+.ks
+.nf
+ perform transaction upon MFD (in memory)
+
+ # Determine if the lock is still in place and likely to remain
+ # in place until the update is completed.
+
+ repeat {
+ get creation date of timelock2
+ if (not the timelock we set at vfn_wait time)
+ rollback transaction
+ else if (not much time left on lock)
+ rollback transaction
+ else
+ break
+ }
+
+ # Update and remove the lock.
+
+ update the mapping file
+ close (mapping file)
+
+ get creation date of timelock2
+ if (not our timelock)
+ bad news: warn user
+
+ create (lockfile)
+.fi
+.ke
+
+.nh 2
+File Locking Facilties
+
+ From the above code fragments it appears that the lockfile approach
+to file locking will work on any machine on which it is an error to delete
+a nonexistent file. The next step is to encapsulate all this in file
+locking primitives which will use the host OS file locking facilities if
+any, otherwise the lockfile techniques we have developed. A set of file
+locking primitives are presented below. These are low level routines
+with fairly restrictive semantics, and are not intended to be used in other
+than system code.
+
+
+.ks
+.nf
+ time = osfn_lock (osfn)
+ nsec = osfn_timeleft (osfn, time)
+ nsec = osfn_unlock (osfn, time)
+.fi
+.ke
+
+
+A file is locked with the \fBosfn_lock\fR primitive, which returns when
+it has successfully placed a lock on the file \fIosfn\fR. The lock is
+guaranteed to remain in place for at least \fItimeout\fR seconds, where
+\fItimeout\fR is a system constant.
+On some systems the file may not actually be locked until it is opened
+with write access. If the file does not exist or cannot be locked
+\fBerror\fR is called. If the file is already locked but the lock has
+expired \fBosfn_lock\fR will break the old lock and return when it has
+set a new one.
+
+The primitive \fBosfn_timeleft\fR returns the number of seconds remaining
+on the lock on file \fIosfn\fR. ERR is returned if the file is no longer
+locked or if the file is currently locked by another user.
+
+A lock is removed with \fBosfn_unlock\fR. The number of seconds remaining
+on the lock at the time it was removed is returned as the function value.
+ERR is returned if the file was no longer locked or had been locked by
+another user when \fBosfn_unlock\fR was called.
+
+
+.nf
+# OSFN_LOCK -- Lock the named OSFN, i.e., gain exclusive write access
+# to a file. Only the process gaining the lock on a file may write
+# to it, but there is no guarantee that another process may not read
+# a locked file. On some systems the file will not actually be locked
+# until it is opened with write permission. If multiple files exist
+# in a directory with the same root but different extensions, only one
+# can be locked at a time.
+
+long procedure osfn_lock (osfn)
+
+begin
+ # Even if file locking is provided by the OS we must determine
+ # if the file is write protected. If the file is not write
+ # protected but cannot be opened for writing our caller will
+ # conclude that the file is locked by another process.
+
+ if (file locking is handled by the OS)
+ if (file osfn is write protected)
+ error ("no write permission on file 'osfn'")
+ else
+ return (clktime)
+
+ # Generate filenames.
+ basename = osfn minus any extension
+ lockfile = strpak (basename // ".lok")
+ timelock1 = strpak (basename // ".tl1")
+ timelock2 = strpak (basename // ".tl2")
+
+ # If the lockfile can be deleted (usual case) then we have
+ # little to do.
+ if (delete (lockfile) == OK)
+ goto setlock_
+
+ # If the lockfile cannot be deleted check that the file itself
+ # exists and that we have delete permission on the directory.
+
+ if (file 'osfn' does not exist)
+ error ("attempt to lock a nonexistent file (osfn)")
+ if (no delete permission on directory)
+ error ("cannot delete file (lockfile)")
+
+ # The file exists and all the necessary permissions are granted,
+ # hence someone else has the file locked and we must wait.
+
+ repeat {
+ for (nsec=0; nsec < timeout_period; nsec=nsec+1)
+ if (delete (lockfile) == OK)
+ goto setlock_
+ if (delete (timelock1) == OK) {
+ get creation date of timelock2
+ if (timelock2 is an old file or does not exist) {
+ create (lockfile)
+ delete (timelock2); create (timelock2)
+ create (timelock1)
+ } else
+ create (timelock1)
+ } else if (continual failure to delete timelock1)
+ create (timelock1)
+ }
+
+setlock_
+ delete (timelock2)
+ create (timelock2)
+
+ return (creation time of timelock2)
+end
+
+
+# OSFN_TIMELEFT -- Determine if a file is still locked, and if so, how
+# much time remains on the lock. TIME is the time value returned when
+# the file was locked. All time values are in units of seconds.
+
+int procedure osfn_timeleft (osfn, time)
+
+begin
+ if (file locking is handled by the OS)
+ return (big number)
+
+ basename = osfn minus any extension
+ lockfile = strpak (basename // ".lok")
+ timelock2 = strpak (basename // ".tl2")
+
+ if (lockfile exists)
+ return (ERR)
+ else if (cannot get file info on timelock2)
+ return (ERR)
+ else if (timelock2.create_time != time)
+ return (ERR)
+ else {
+ timeleft = max (0, timeout_period - (clktime - time)
+ return (timeleft)
+ }
+end
+
+
+# OSFN_UNLOCK -- Release the lock on a file and return the number of
+# seconds that were left on the lock. ERR is returned if the file is
+# no longer locked or if the lock is not the one originally placed
+# on the file.
+
+int procedure osfn_unlock (osfn, time)
+
+begin
+ timeleft = osfn_timeleft (osfn, time)
+
+ if (timeleft != ERR) {
+ basename = osfn minus any extension
+ lockfile = strpak (basename // ".lok")
+ create (lockfile)
+ }
+
+ return (timeleft)
+end
+.fi
+
+.nh
+VFN Package Data Structures
+
+ A process may have only a single VFN open with write permission at any
+one time to eliminate the possibility of deadlock (section 4). Any number
+of VFN's may be open for read-only access, e.g., when recursively descending
+a directory tree. Most VFN accesses do not involve a reference to a mapping
+file. Since the mapping file is infrequently referenced, separate descriptors
+are used for the VFN and the mapping file. The VFN descriptor is called the
+VFD and the mapping file descriptor the MFD.
+
+The MFD is only allocated if a mapping file is referenced, i.e., if the OSFN
+is long. Before allocating a new MFD we must search the list of open VFN's
+to see if the mapping file has already been opened and assigned a MFD. Every
+VFN must have its own VFD. To prevent having to MALLOC a
+VFD every time a filename is mapped, one VFD will always be allocated (after
+the first file reference). Thus, for a simple filename mapping where the
+OSFN is short, no MALLOC or other kernel calls will be required, i.e., the only
+expense will be the string operations required to map the filename.
+
+
+.ks
+.nf
+# VFN Descriptor
+
+struct vfd {
+ struct mfd *v_mfd # ptr to mapping file descr.
+ int v_acmode # access mode
+ int v_len_osdir # length of v_osdir string
+ int v_len_root # length of v_root string
+ int v_len_extn # length of v_extn string
+ char v_vfn[33] # original VFN, minus LDIR
+ char v_osdir[33] # OS directory name
+ char v_root[33] # encoded root filename
+ char v_extn[33] # encoded and mapped extension
+}
+.fi
+.ke
+
+
+.ks
+.nf
+# Mapping File Descriptor. The length of the descriptor is adjusted as
+# necessary to provide storage for the filename pairs.
+
+struct mfd {
+ long m_locktime # clktime when lock set
+ int m_fd # file descriptor
+ int m_nfiles # number of files in map list
+ int m_lastop # last operation performed
+ int m_modified # was database modified
+ char m_vfnmap[] # OSFN of mapping file
+ int m_checksum # checksum of m_fnmap
+ char m_fnmap[nfiles*34*2] # vfn/osfn pairs
+}
+.fi
+.ke
+
+.nh
+Semicode for Parts of the VFN Package
+
+.nf
+# VFNOPEN -- Open that part of the VFN database associated with a particular
+# VFN. Allocate VFD descriptor, map but do not squeeze VFN to long OSFN.
+
+pointer procedure vfnopen (vfn, mode)
+
+begin
+ if (first_time) {
+ permanently allocate a VFD
+ nvfn_open = 0
+ first_time = false
+ }
+
+ # Allocate and initialize VFD.
+ if (no VFN's open) {
+ use preallocated VFD
+ increment count of open VFN's
+ } else
+ allocate a new VFD
+
+ call fbrkfn to break VFN into OSDIR, ROOT, and EXTN fields
+
+ return (pointer to VFD)
+end
+
+
+# VFNCLOSE -- Close a VFN and optionally update the VFN database. An update
+# is performed only if the mapping file is open with write permission,
+# a modify transaction has occurred, and updating is enabled.
+
+procedure vfnclose (vp, update)
+
+begin
+ # If the mapping file was not used or if it was not modified we
+ # just return the buffers and quit.
+
+ mfp = vp.mfp
+ if (mfp == NULL) {
+ if (nvfn_open > 1)
+ mfree (vp, TY_STRUCT)
+ return
+ } else if (mfp.m_modified == NO || update == NO_UPDATE) {
+ mfree (mfp, TY_STRUCT)
+ if (nvfn_open > 1)
+ mfree (vp, TY_STRUCT)
+ return
+ }
+
+ # If we get here the mapping file is open with write permission,
+ # a transaction has been performed which modified the database,
+ # and we were called with updating enabled. Rollback (repeat)
+ # the transaction if the lock has been broken or if there is not
+ # enough time remaining on the lock.
+
+ while (osfn_timeleft (mfp.m_vfnmap, mfp.m_locktime) < xx) {
+ osfn_unlock (mfp.m_vfnmap, mfp.m_locktime)
+ switch (mfp.lastop) {
+ case VFN_ADD:
+ vfnadd (vp, junkstr)
+ case VFN_DEL:
+ vfndel (vp, junkstr)
+ }
+ }
+
+ # Update and close the mapping file.
+ compute checksum and store in the mapping file
+ rewrite mapping file to disk
+ close (mapping file)
+
+ if (osfn_unlock (mfp.m_vfnmap, mfp.m_locktime) == ERR)
+ warn ("broken file protect lock in directory 'vp.v_osdir'")
+
+ mfree (mfp, TY_STRUCT)
+ if (nvfn_open > 1)
+ mfree (vp, TY_STRUCT)
+end
+
+
+# VFNMAP -- Map and pack the VFN into an OSFN, but do not modify the
+# database. The mapping file is accessed only if the filename is
+# degenerate.
+
+int procedure vfnmap (vp, osfn)
+
+begin
+ # If the OSFN is short or long but still unique within directory,
+ # then it is not necessary to access the mapping file.
+
+ if (root is longer than permitted by host system) {
+ squeeze root
+ if (squeezed root filename is unique within directory) {
+ concatenate and pack osfn
+ return (OK)
+ }
+ }
+
+ # If we get here then the squeezed filename is degenerate, i.e.,
+ # not unique within the directory. It is necessary to read the
+ # mapping file to learn what OSFN has been assigned to the file.
+
+ mfp = allocate and init mapping file descriptor
+ mfp.m_vfnmap = strpak (osdir // "zzvfnmap.vfn")
+
+ # Open or create the mapping file. Create must precede lock
+ # as lock will abort if the file to be locked does not exist.
+ # If opening existing file READ_WRITE, lock first to determine
+ # if we have write perm on file, then keep trying to open file
+ # until open succeeds (if OS level file locking is in use the
+ # open will return ERR as long as another process has the
+ # file open for writing).
+
+ switch (vp.v_acmode) {
+ case VFN_WRITE:
+ if (no mapping file created yet) {
+ create a new mapping file
+ time = osfn_lock (mfp.m_vfnmap)
+ } else {
+ time = osfn_lock (mfp.m_vfnmap)
+ repeat {
+ open mapping file for READ_WRITE access
+ sleep (1)
+ } until (open succeeds)
+ }
+ default:
+ open mapping file for READ_ONLY access
+ }
+
+ # Read mapping file into descriptor. Increase default size of
+ # descriptor if necessary to read entire file. Repeat the
+ # read if the checksum is invalid, indicating that a write
+ # was in progress when we read.
+
+ maxch = default buffer size for the filename map
+ repeat {
+ repeat {
+ read maxch chars into mfp.m_checksum
+ if (nchars_read >= maxch) {
+ increase size of descriptor
+ maxch = maxch + increase in storage
+ }
+ } until (nchars_read < maxch)
+ compute checksum
+ } until (checksum == mfp.m_checksum)
+
+ if (nchars_read == EOF)
+ mfp.m_nfiles = 0
+ else
+ mfp.m_nfiles = max (0, (nchars - SZ_INT) / SZ_FNMAP_PAIR)
+
+ search mfp.m_fnmap for filename vp.vfn
+ if (not found)
+ status = ERR
+ else {
+ status = OK
+ pack osfn to output argument
+ }
+
+ if (access_mode != VFN_WRITE)
+ close mapping file
+
+ return (status)
+end
+
+
+# VFNADD -- Map a VFN to an OSFN and add an entry for the VFN to the
+# database if the OSFN is degenerate.
+
+procedure vfnadd (vp, osfn)
+
+begin
+ # If VFNMAP does not return ERR then the file already exists.
+ # We return ERR if the file already exists.
+
+ if (vfnmap (vp, osfn) != ERR)
+ return (ERR)
+ else if (short osfn)
+ return (OK)
+
+ if (osfn is degenerate) {
+ generate a unique new_osfn
+ create degeneracy flag file osfn // ".zmd"
+ osfn = strpak (new_osfn)
+ }
+
+ add vfn,osfn pair to vp.mfp.m_fnmap
+ mfp.m_lastop = VFN_ADD
+
+ return (OK)
+end
+
+
+# VFNDEL -- Map a VFN to an OSFN and delete the entry for the VFN from
+# the database if the OSFN is degenerate. Do not delete the degeneracy
+# flag file if no longer degenerate, because even though the OSFN is
+# no longer degenerate the OSFN reflects the former degeneracy of the
+# file, and we do not want to rename the file.
+
+procedure vfnadd (vp, osfn)
+
+begin
+ # If VFNMAP returns ERR then the file does not exist.
+ # We return ERR if the file does not exist.
+
+ if (vfnmap (vp, osfn) == ERR)
+ return (ERR)
+ else if (short osfn)
+ return (OK)
+
+ delete vfn,osfn pair to vp.mfp.m_fnmap
+ mfp.m_lastop = VFN_DEL
+
+ return (OK)
+end
+
+
+# FBRKFN -- Transform a VFN into an OSDIR, an escape sequence encoded and
+# extension mapped root OS filename ROOT, and an extension EXTN. The root
+# may be longer than permitted by the host OS, i.e., squeezing is not done
+# here.
+
+procedure fbrkfn (vfn, osdir, lenosdir, root, lenroot, extn, lenextn)
+
+begin
+ # If the VFN begins with an OSDIR prefix it is assumed to be an OSFN
+ # and no mapping is performed.
+
+ call ZFXDIR to extract osdir prefix, if any
+ if (osdir prefix found) {
+ copy remainder of vfn to root
+ return
+ }
+
+ osdir = null_string
+ root = null_string
+ extn = null_string
+
+ # Process the directory and filename fields. In the case of a simple
+ # filename the first pass performs the escape sequence encoding of the
+ # filename directly into root, and we return after possibly mapping
+ # the extension.
+
+ repeat {
+ extract next field into root and extn with escape sequence encoding
+ if (delimiter == '$')
+ if (osdir == null_string) {
+ osdir = recursively expand ldir
+ if (ldir not found)
+ error ("logical directory 'ldir' not found")
+ } else
+ error ("illegal $ delimiter in filename 'vfn'")
+ } else if (delimiter == '/')
+ fold field, a subdirectory, into osdir
+ } until (delimiter == EOS)
+
+ # At this point we have osdir, root, and extn strings, any of which may
+ # be null. If more than one "." delimited extn string was encountered
+ # during escape sequence encoding, or if the maximum extn length was
+ # exceedd, then that extn will already have been incorporated into the
+ # root.
+
+ if (extn != null_string)
+ map filename extension
+end