diff options
author | Joseph Hunkeler <jhunkeler@gmail.com> | 2015-07-08 20:46:52 -0400 |
---|---|---|
committer | Joseph Hunkeler <jhunkeler@gmail.com> | 2015-07-08 20:46:52 -0400 |
commit | fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch) | |
tree | bdda434976bc09c864f2e4fa6f16ba1952b1e555 /pkg/tbtables/selector/tcsopen.x | |
download | iraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz |
Initial commit
Diffstat (limited to 'pkg/tbtables/selector/tcsopen.x')
-rw-r--r-- | pkg/tbtables/selector/tcsopen.x | 818 |
1 files changed, 818 insertions, 0 deletions
diff --git a/pkg/tbtables/selector/tcsopen.x b/pkg/tbtables/selector/tcsopen.x new file mode 100644 index 00000000..f50ba282 --- /dev/null +++ b/pkg/tbtables/selector/tcsopen.x @@ -0,0 +1,818 @@ +include <tbset.h> +include "tcs.h" + +define MAX_STACK 8 # max file depth in column list +define DELIM ',' # column name separator +define COMMENT '#' # comment character +define ESCAPE '\\' # escape character +define SQUOTE '\'' # single quote +define DQUOTE '"' # double quote +define LPAREN '(' # left parenthesis +define RPAREN ')' # right parenthesis +define NEWLINE '\n' # end of line character +define NOTWHITE ($1 > ' ') # private definition of white space + +.help tcs_open +.nf___________________________________________________________________________ +Table column selector + +This file contains procedures to expand a list of column names into an +array of column descriptors which match the list. The list is a list +of column patterns separated by commas. The column pattern is either +a column name, a file name containing a list of column names, or a +pattern using the usual IRAF pattern matching syntax. For example, the +string + + a[1-9], b, time*, @column.lis + +would be expanded as the column names a1 through a9, b, any column +name beginning with "time", and all the column names in the file +column.lis. If the column list is entirely whitespace, the array of +column descriptors will include all the columns in the table, as this +seems the most reasonable default. If the first non-white character is +the negation character (either ~ or !), the array of column descriptors +will include all columns not matched by the list. The negation character +only has this meaning at the beginning of the list. + +Column names may also contain array sections having the same format +as image sections. The sections are surrounded by parentheses. For example + + spec(1:200:2) image(*,30) spec (20:*) + +are valid array sections. + +.endhelp______________________________________________________________________ + +# TCS_OPEN -- Convert a list of column names to a list of descriptors + +procedure tcs_open (tp, columns, descrip, ndescrip, maxdescrip) + +pointer tp # i: table descriptor +char columns[ARB] # i: list of column names +pointer descrip[ARB] # o: list of column array selectors +int ndescrip # o: number of column array selectors +int maxdescrip # i: length of descrip array +#-- +bool negate, file +int ncols, top, fd_stack[MAX_STACK] +pointer sp, token, pattern, section, errmsg + +string overflow "Column list has too many nested files" + +bool tcs_hasmeta() +int tcs_token(), strlen(), stropen(), open() + +errchk tcs_patmatch + +begin + # Allocate memory for temporary strings + + call smark (sp) + call salloc (token, SZ_FNAME, TY_CHAR) + call salloc (pattern, SZ_FNAME, TY_CHAR) + call salloc (section, SZ_FNAME, TY_CHAR) + call salloc (errmsg, SZ_LINE, TY_CHAR) + + # Keep track of the number of column patterns and the negation + # pattern. At the end of the procedure, if no patterns were read, + # the list is blank, which signifies all columns should be used. + # If the negation character is encountered, the list of columns + # to use is inverted. + + ncols = 0 + negate = false + + # Initialize the number of columns matched to zero + + ndescrip = 0 + + # Since the column list may contain filenames, which in turn will + # contain other lists, we use a stack of file descriptors to keep + # track of the current file. The column list is also opened as a + # file, for the sake of generality in the code. + + top = 1 + file = false + fd_stack[1] = stropen (columns, strlen(columns), READ_ONLY) + + while (top > 0) { + # The tokenizer either returns a negation character (! or ~) + # a filename (preceded by a @) or a column name. Tokens, + # except for the negation character, are separated by commas. + + while (tcs_token (fd_stack[top], file, Memc[token], SZ_FNAME) > 0){ + ncols = ncols + 1 + + if (Memc[token] == '!') { + # Negation character. Only is significant as first + # character in the column list. + + negate = (ncols == 1) + ncols = ncols - 1 + + } else if (Memc[token] == '@') { + # Filename. Open the file and push it on the stack. + + if (top == MAX_STACK) + call error (1, overflow) + + top = top + 1 + fd_stack[top] = open (Memc[token+1], READ_ONLY, TEXT_FILE) + + ncols = ncols - 1 + + } else { + # Column pattern. Remove the section from the pattern + + call tcs_breakname (Memc[token], Memc[pattern], + Memc[section]) + + # Look for metacode characters. If found, call the + # pattern matching routine, otherwise call the string + # matching routine. The division between the routines + # is for reasons of efficiency. + + call strlwr (Memc[pattern]) + + if (tcs_hasmeta (Memc[pattern], SZ_FNAME)) { + call tcs_patmatch (tp, Memc[pattern], Memc[section], + descrip, ndescrip, maxdescrip) + } else { + call tcs_strmatch (tp, Memc[pattern], Memc[section], + descrip, ndescrip, maxdescrip) + } + } + + file = top > 1 + } + + # All columns have been read from this file, + # so pop it from the stack + + call close (fd_stack[top]) + top = top - 1 + } + + # A blank list signifies select all columns from the table + + if (ncols == 0) + call tcs_allcols (tp, descrip, ndescrip, maxdescrip) + + # The negation character signifies those columns not in the list + # should be selected + + if (negate) + call tcs_invert (tp, descrip, ndescrip, maxdescrip) + + call sfree (sp) +end + +# TCS_TOKEN -- Extract the next token from a column list + +int procedure tcs_token (fd, file, token, maxch) + +int fd # i: descriptor of file containing column list +bool file # i: is the read coming from a file? +char token[ARB] # o: token string +int maxch # i: declared length of token string +#-- +char ch +int nc, endch, paren + +char getc() + +begin + # Eat leading whitespace and delimeters + + repeat { + ch = getc (fd, ch) + + # Eat comment if we are reading from a file + + if (ch == COMMENT && file) { + repeat { + ch = getc (fd, ch) + } until (ch == EOF || ch == NEWLINE) + } + + } until (ch == EOF || (NOTWHITE(ch) && ch != DELIM)) + + + # Leading character determines rest of processing + + if (ch == EOF) { + # End of file. Return null string + token[1] = EOS + return (0) + + } else if (ch == '!' || ch == '~') { # ~ added on 1999 Jan 29 + # Negation character. Return the character. + + token[1] = '!' # same token for both negation characters + token[2] = EOS + return (1) + + } else if (ch == '@') { + # A filename. Return all characters up to whitespace or + # the next delimeter. + + nc = 1 + while (NOTWHITE(ch) && ch != DELIM) { + if (nc <= maxch) { + token[nc] = ch + nc = nc + 1 + } + + ch = getc (fd, ch) + } + + token[nc] = EOS + return (nc - 1) + + } else if (ch == SQUOTE || ch == DQUOTE){ + # A quoted string. Return all characters up to and including + # the closing quote. + + endch = ch + + nc = 1 + repeat { + if (nc < maxch) { + token[nc] = ch + nc = nc + 1 + } + + ch = getc (fd, ch) + } until (ch == EOF || ch == endch) + + token[nc] = endch + token[nc+1] = EOS + return (nc) + + } else { + # An ordinary column name. Return all characters up to the next + # whitespace or delimeter. Delimeters inside parentheses + # are part of the column section and are not treated as delimeters. + + nc = 1 + paren = 0 + while (NOTWHITE(ch) && (paren > 0 || ch != DELIM)) { + if (nc <= maxch) { + token[nc] = ch + nc = nc + 1 + } + + if (ch == LPAREN) { + paren = paren + 1 + } else if (ch == RPAREN) { + paren = paren - 1 + } + + ch = getc (fd, ch) + } + + token[nc] = EOS + return (nc - 1) + } + +end + +# TCS_BREAKNAME -- Break a column name into root and section + +procedure tcs_breakname (name, root, section) + +char name[ARB] # i: column name +char root[ARB] # o: root (everything up to the parentheses) +char section[ARB] # o: section (everything in the parentheses) +#-- +int ic, jc, kc, paren, state + +begin + jc = 1 + kc = 1 + paren = 0 + state = 1 + + # There are three states: Before the first parenthesis + # where characters are copied to the root, inside the + # parentheses where characters are copied to the section + # and after the parentheses where characters are again + # copied to the root. The variable paren keeps track of + # parentheses so we can transition between the second and + # third state at the parenthesis that matches the first. + + for (ic = 1; name[ic] != EOS; ic = ic + 1) { + if (state == 1) { + if (name[ic] == LPAREN) { + section[kc] = name[ic] + kc = kc + 1 + + state = 2 + paren = 1 + } else { + root[jc] = name[ic] + jc = jc + 1 + } + + } else if (state == 2) { + if (paren == 0) { + state = 3 + } else { + # Whitespace is not copied to the section + + if (NOTWHITE(name[ic])) { + section[kc] = name[ic] + kc = kc + 1 + } + + if (name[ic] == LPAREN) { + paren = paren + 1 + } else if (name[ic] == RPAREN) { + paren = paren - 1 + } + } + } else if (state == 3) { + root[jc] = name[ic] + jc = jc +1 + } + } + + root[jc] = EOS + section[kc] = EOS + +end + +# TCS_HASMETA -- Check for presence of metacharacters + +bool procedure tcs_hasmeta (pattern, maxch) + +char pattern[ARB] # u: character string +int maxch # i: declared length of pattern +#-- +bool meta +int ic, jc +pointer sp, buffer + +int stridx() + +begin + # If the pattern is enclosed in quotes, all characters are + # interpreted as literals. Strip quotes from the pattern and + # return false. + + if (pattern[1] == SQUOTE || pattern[1] == DQUOTE) { + for (ic = 1; pattern[ic] != EOS; ic = ic + 1) + pattern[ic] = pattern[ic+1] + + pattern[ic-2] = EOS + return (false) + } + + # Copy the pattern to a temporary buffer + + call smark (sp) + call salloc (buffer, maxch, TY_CHAR) + + jc = 0 + meta = false + for (ic = 1; pattern[ic] != EOS; ic = ic + 1) { + + if (pattern[ic] == ESCAPE && pattern[ic+1] != EOS) { + # Copy escape sequences but do not count as metacharacters + + ic = ic + 1 + if (jc <= maxch) { + Memc[buffer+jc] = ESCAPE + jc = jc + 1 + } + + } else if (pattern[ic] == '*') { + # Convert '*' to '?*', count as metacharacter + + meta = true + if (jc <= maxch) { + Memc[buffer+jc] = '?' + jc = jc + 1 + } + + } else if (stridx (pattern[ic], "[?{") > 0) { + # Check for other metacharacters + + meta = true + } + + if (jc <= maxch) { + Memc[buffer+jc] = pattern[ic] + jc = jc + 1 + } + } + + Memc[buffer+jc] = EOS + + if (meta) { + # Enclose pattern in "^pattern$" to force match + # of entire column name + + call sprintf (pattern, maxch, "^%s$") + call pargstr (Memc[buffer]) + + } else { + # Remove escape characters from pattern + # if there are no metacharacters + + jc = 1 + for (ic = 0; Memc[buffer+ic] != EOS; ic = ic + 1) { + if (Memc[buffer+ic] == ESCAPE && Memc[buffer+ic+1] != EOS) + ic = ic + 1 + + pattern[jc] = Memc[buffer+ic] + jc = jc + 1 + } + + pattern[jc] = EOS + } + + call sfree (sp) + return (meta) +end + +# TCS_PATMATCH -- Match column names containing metacharacters + +procedure tcs_patmatch (tp, pattern, section, descrip, ndescrip, maxdescrip) + +pointer tp # i: table descriptor +char pattern[ARB] # i: pattern to match +char section[ARB] # i: array section +pointer descrip[ARB] # u: list of column array selectors +int ndescrip # u: number of column array selectors +int maxdescrip # i: length of descrip array +#-- +int icol, ncols, id +pointer sp, buffer, colname, errmsg, cp + +string badpattern "Syntax error in wildcard pattern (%s)" + +int tbpsta(), patmake(), patmatch() +pointer tbcnum() + +errchk tcs_fillstruct + +begin + # Allocate temporary strings + + call smark (sp) + call salloc (buffer, SZ_LINE, TY_CHAR) + call salloc (colname, SZ_COLNAME, TY_CHAR) + call salloc (errmsg, SZ_LINE, TY_CHAR) + + # Compile the pattern + + if (patmake (pattern, Memc[buffer], SZ_LINE) == ERR) { + call sprintf (Memc[errmsg], SZ_LINE, badpattern) + call pargstr (pattern) + call error (1, Memc[errmsg]) + } + + # Look at each column name to see if it matches the pattern. + # If the pattern matches, add it to the list if the column + # has not already been matched. + + ncols = tbpsta (tp, TBL_NCOLS) + + do icol = 1, ncols { + # Get column name from column number + + cp = tbcnum (tp, icol) + call tbcigt (cp, TBL_COL_NAME, Memc[colname], SZ_COLNAME) + call strlwr (Memc[colname]) + + # Pattern matching test + + if (patmatch (Memc[colname], Memc[buffer]) > 0) { + # Check to see if already matched + + for (id = 1; id <= ndescrip; id = id + 1) { + if (cp == TCS_COLUMN(descrip[id])) + break + } + + # Add to array if not already matched and array not full + + if (id > ndescrip && ndescrip < maxdescrip) { + ndescrip = ndescrip + 1 + call tcs_fillstruct (tp, cp, section, descrip[ndescrip]) + } + } + } + + call sfree (sp) +end + +# TCS_STRMATCH -- Match column names to table columns + +procedure tcs_strmatch (tp, pattern, section, descrip, ndescrip, maxdescrip) + +pointer tp # i: table descriptor +char pattern[ARB] # i: pattern to match +char section[ARB] # i: array section +pointer descrip[ARB] # u: list of column array selectors +int ndescrip # u: number of column array selectors +int maxdescrip # i: length of descrip array +#-- +int id +pointer cp + +errchk tcs_fillstruct + +begin + # Find column pointer corresponding to column name + + call tbcfnd (tp, pattern, cp, 1) + + if (cp == NULL) + return + + # Check to see if already matched + + for (id = 1; id <= ndescrip; id = id + 1) { + if (cp == TCS_COLUMN(descrip[id])) + break + } + + # Add to array if not already matched and array not full + + if (id > ndescrip && ndescrip < maxdescrip) { + ndescrip = ndescrip + 1 + call tcs_fillstruct (tp, cp, section, descrip[ndescrip]) + } +end + +# TCS_FILLSTRUCT -- Fill structure with info about the column + +procedure tcs_fillstruct (tp, cp, section, descrip) + +pointer tp # i: table descriptor +pointer cp # i: column descriptor +char section[ARB] # i: column array section +pointer descrip # i: column array selector +#-- +int ic, idim, ndim, first, last, inc, axlen[MAXDIM] + +string baddimen "Dimension of section does not match column" + +int tcs_getsect() +errchk tcs_getsect + +begin + # Get dimension of array and length of each axis + + call tbciga (tp, cp, ndim, axlen, MAXDIM) + + # Allocate column selector descriptor + + call malloc (descrip, TCS_LENGTH(ndim), TY_INT) + + if (section[1] == EOS) { + # If there is no section, copy the array dimensions + # to the descriptor + + + do idim = 1, ndim { + TCS_FIRST(descrip,idim) = 1 + TCS_LAST(descrip,idim) = axlen[idim] + TCS_INC(descrip,idim) = 1 + } + + } else { + # If there is a section, parse it and copy it to descriptor + + ic = 2 + do idim = 1, ndim { + if (tcs_getsect (section, ic, first, last, inc) <= 0){ + # Not enough dimensions in section + + call mfree (descrip, TY_INT) + call error (1, baddimen) + } + + TCS_FIRST(descrip,idim) = first + TCS_INC(descrip,idim) = inc + + # Indef indicates an asterisk in the section, for which + # we substitute the actual array dimension + + if (IS_INDEFI (last)) { + TCS_LAST(descrip,idim) = axlen[idim] + } else { + TCS_LAST(descrip,idim) = last + } + } + + # It is an error if the section has more dimensions than the array + + if (section[ic] != EOS) { + call mfree (descrip, TY_INT) + call error (1, baddimen) + } + } + + # Eliminate spurious dimensions from the array + + for (idim = ndim; idim > 0; idim = idim - 1) { + if (axlen[idim] > 1) + break + } + + ndim = idim + + # Save the column pointer and number of dimensions in the descriptor + + TCS_COLUMN(descrip) = cp + TCS_DIMEN(descrip) = ndim + +end + +# TCS_GETSECT -- Parse the array section string + +int procedure tcs_getsect (section, ic, first, last, inc) + +char section[ARB] # i: section string +int ic # u: starting character in string +int first # o: first element in array +int last # o: last element in array +int inc # o: array increment +#-- +bool done +int jc, nc, ival, old_ic, value +pointer sp, number + +bool streq() +int stridx(), ctoi() + +string badsect "Syntax error in array section" + +begin + # Temporary string to hold numeric token + + call smark (sp) + call salloc (number, SZ_FNAME, TY_CHAR) + + # Set defaults for outputs + + first = 1 + last = 1 + inc = 1 + + # Read charcaters from section until a delimeter is found. + # Then check to see if it is a wildcard. If not, convert it + # to a number and set the appropriate output. + + jc = 0 + ival = 1 + old_ic = ic + done = false + + while (! done && section[ic] != EOS) { + if (stridx (section[ic], "(),:") == 0) { + # Copy characters until delimeter + + Memc[number+jc] = section[ic] + jc = jc + 1 + + } else { + Memc[number+jc] = EOS + + if (streq (Memc[number], "*")) { + last = INDEFI + + } else { + # Convert string to number + + jc = 1 + nc = ctoi (Memc[number], jc, value) + + # Check for trailing non-numeric chars + + if (Memc[number+nc] != EOS) + call error (1, badsect) + + # Set appropriate output + + switch (ival) { + case 1: + first = value + case 2: + last = value + if (last < first) + call error (1, badsect) + case 3: + inc = value + default: + call error (1, badsect) + } + + ival = ival + 1 + } + + # Reset to read next string + + jc = 0 + + # Exit loop when delimeter or closing parenthesis seen + + done = (section[ic] == DELIM || section[ic] == RPAREN) + } + + ic = ic + 1 + } + + # A single number indicates one element in the array + + if (last == 1 && first > 1) + last = first + + call sfree (sp) + return (ic - old_ic) + +end + +# TCS_ALLCOLS -- Get descriptors for all columns in the table + +procedure tcs_allcols (tp, descrip, ndescrip, maxdescrip) + +pointer tp # i: table descriptor +pointer descrip[ARB] # o: list of column array selectors +int ndescrip # o: number of column array selectors +int maxdescrip # i: length of descrip array +#-- +int icol, ncols +pointer cp + +int tbpsta() +pointer tbcnum() + +begin + ncols = tbpsta (tp, TBL_NCOLS) + ncols = min (ncols, maxdescrip) + + do icol = 1, ncols { + cp = tbcnum (tp, icol) + + ndescrip = ndescrip + 1 + call tcs_fillstruct (tp, cp, "", descrip[ndescrip]) + } + +end + +# TCS_INVERT -- Get descriptors for all columns not currently in list + +procedure tcs_invert (tp, descrip, ndescrip, maxdescrip) + +pointer tp # i: table descriptor +pointer descrip[ARB] # o: list of column array selectors +int ndescrip # o: number of column array selectors +int maxdescrip # i: length of descrip array +#-- +int id, icol, jcol, ncols +pointer cp, sp, clist + +int tbpsta() +pointer tbcnum() + +begin + # Allocate temporary array for column list + + ncols = tbpsta (tp, TBL_NCOLS) + + call smark (sp) + call salloc (clist, ncols, TY_INT) + + # Get each column pointer and search column selectors for a match + # If none is, found, copy the pointer to the column list + + jcol = 0 + do icol = 1, ncols { + cp = tbcnum (tp, icol) + for (id = 1; id <= ndescrip; id = id + 1) { + if (TCS_COLUMN(descrip[id]) == cp) + break + } + + if (id > ndescrip) { + Memi[clist+jcol] = cp + jcol = jcol + 1 + } + } + + # Free the old descriptors + + call tcs_close (descrip, ndescrip) + + # Get the column descriptors for the columns in the list + + ndescrip = min (jcol, maxdescrip) + do id = 1, ndescrip + call tcs_fillstruct (tp, Memi[clist+id-1], "", descrip[id]) + + call sfree (sp) +end |