Initial commit

author: Joseph Hunkeler <jhunkeler@gmail.com> 2015-07-08 20:46:52 -0400
committer: Joseph Hunkeler <jhunkeler@gmail.com> 2015-07-08 20:46:52 -0400
commit: fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch)
tree: bdda434976bc09c864f2e4fa6f16ba1952b1e555 /pkg/cl/lexicon.c
download: iraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz
1 files changed, 655 insertions, 0 deletions
diff --git a/pkg/cl/lexicon.c b/pkg/cl/lexicon.c
new file mode 100644
index 00000000..5a600d01
--- /dev/null
+++ b/pkg/cl/lexicon.c
@@ -0,0 +1,655 @@
+/* Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+ */
+
+#define import_spp
+#define	import_libc
+#define	import_ctype
+#define	import_xnames
+#define	import_lexnum
+#include <iraf.h>
+
+#include "proto.h"
+
+
+extern	int	cldebug;
+
+/*
+ * NOTE: This file is #included in the parser and inherits the parser global
+ * declarations.
+ */
+
+#define	LEXDEBUG	1
+#define	newtoken	(yyleng==0)
+
+int	_lexmodes;		/* nonzero enables mode switching	*/
+int	lexdebug=0;		/* debug lexical analyzer		*/
+int	lexcol=0;		/* nchars since \n or ;			*/
+int	pbtoken;		/* push back token			*/
+int	newarg;			/* whitespace argument delimiter seen	*/
+int	lhs;			/* "left hand side" switch for []	*/
+
+/* YYLEX -- Return the next token from the input stream.  Two separate lexical
+ * analyzers are provided, the "command mode" lexical analyzer for interactive
+ * command entry, and the "compute mode" analyzer for more sophisticated
+ * applications.  The nesting level of parentheses and braces is used to switch
+ * between the two modes.  When the paren level is nonzero compute mode is in
+ * effect.  Mode switching may be defeated by setting the external variable
+ * _lexmodes to zero.  A single parser accepts input from both lexical
+ * analyzers.
+ */
+yylex()
+{
+	register int	token;
+
+	if (_lexmodes && parenlevel == 0 && bracelevel < PBRACE) {
+	    while (!(token = lexicon()))
+		if (yywrap())
+		    break;
+	} else
+	    token = lex_yylex();
+
+	if (!lexdebug)
+	    return (token);
+
+#if LEXDEBUG
+	switch (token) {
+	case Y_CONSTANT:
+	    eprintf ("CONSTANT ");
+	    fprop (stderr, reference (operand, yylval));
+	    eprintf ("\n");
+	    break;
+	case Y_IDENT:
+	    eprintf ("IDENT ");
+	    fprop (stderr, reference (operand, yylval));
+	    eprintf ("\n");
+	    break;
+	case Y_OSESC:
+	    eprintf ("Y_OSESC ");
+	    fprop (stderr, reference (operand, yylval));
+	    eprintf ("\n");
+	    break;
+	case Y_APPEND:
+	    eprintf ("Y_APPEND\n");
+	    break;
+	case Y_ALLAPPEND:
+	    eprintf ("Y_ALLAPPEND\n");
+	    break;
+	case Y_ALLREDIR:
+	    eprintf ("Y_ALLREDIR\n");
+	    break;
+	case Y_GSREDIR:
+	    eprintf ("Y_GSREDIR\n");
+	    break;
+	case Y_ALLPIPE:
+	    eprintf ("Y_ALLPIPE\n");
+	    break;
+	case Y_NEWLINE:
+	    eprintf ("NEWLINE\n");
+	    break;
+	default:
+	    eprintf ("`%c'\n", token);
+	    break;
+	}
+#endif
+
+	return (token);
+}
+
+
+/* LEXICON -- Simple "conversational mode" lexical analyser.  Lexical analysis
+ * in the CL is carried out by a dual mode lexical analyser.  In conversational
+ * mode there are few tokens and few special characters; arguments are
+ * delimited by whitespace and may contain nonalphanumeric characters.  Few
+ * strings have to be quoted.  In computational mode the arithmetic operators
+ * are recognized and arguments must be delimited by commas.  Computational
+ * mode is in effect whenever the parenlevel is nonzero.
+ *
+ * The two modes are implemented with two separate lexical analyzers.  Gettok
+ * implements conversational mode, while computational mode is implemented with
+ * a LEX finite state automaton.  Gettok recognizes the following special chars:
+ *
+ *	[ \t]				argument delimiter
+ *	["']				string
+ *	\n				newline
+ *	\				single character escape
+ *	!				os escape
+ *	#				comment
+ *	&				spawn background job
+ *	(				lparen
+ *	+				plus (switch)
+ *	-				minus (switch)
+ *	;				eost
+ *	=				equals
+ *	+=				add and set
+ *	-=				subtract and set
+ *	*=				multiply and set
+ *	/=				divide and set
+ *	<				redirin
+ *	>				redir
+ *	>&				allredir
+ *	>>				append
+ *	>>&				allappend
+ *	>(G|I|P|)+			graphics stream redirection
+ *	{				lbrace
+ *	|				pipe
+ *	|&				allpipe
+ *	}				rbrace
+ *	[				beginning of index list
+ *	]				end of index list
+ *
+ * The history metacharacter ^ is processed before input is passed to the
+ * lexical analyser.  Any sequence of nonwhite characters that does not form
+ * one of the recognized tokens is returned as a string.
+ */
+lexicon()
+{
+	char	*bkgerr = "ERROR: cannot submit background job inside {}\n";
+	register int	ch, cch;
+	register int	token;
+	int	stringtok, identifier, setlevel;
+	int	clswitch;
+	char	*op, *index();
+
+	/* Return pushed back token if any.
+	 */
+	if (pbtoken) {
+	    token = pbtoken;
+	    pbtoken = 0;
+	    return (token);
+	}
+
+	/* Skip leading whitespace.  If whitespace is seen and we are in an
+	 * argument list (according to the parser) set flag to output the
+	 * comma argument delimiter if the next token begins an argument.
+	 * If whitespace or = is seen (except whitespace at the beginning of
+	 * a command) then set LHS to false, turning [] off as conversational
+	 * mode metacharacters (they will be automatically turned on when
+	 * compute mode is entered in an expression).
+	 */
+	while (ch = input())
+	    if (ch == ' ' || ch == '\t') {
+space:		if (lexcol > 0)
+		    lhs = 0;
+		if (inarglist)
+		    newarg++;
+	    } else if (ch == '\\') {
+		if ((ch = input()) != '\n') {
+		    unput (ch);
+		    break;
+		} else
+		    goto space;
+	    } else
+		break;
+	
+
+	/* Start new token.
+	 */
+	if (ch) {
+	    unput (ch);
+	    yyleng = 0;
+	    if (!inarglist)
+		newarg = 0;
+	} else
+	    return (0);
+
+
+	/* Identify and accumulate next token.  Simple tokens are returned as
+	 * integer constants, more complex tokens as operand structures in
+	 * yylval.
+	 */
+	while (ch = input()) {
+	    lexcol++;
+
+	    switch (ch) {
+	    case '&':
+		/* An ampersand triggers bkg execution in command mode, unless
+		 * it occurs in a token such as >& or >>&, in which case we
+		 * never get here.
+		 */
+		if (!newtoken) {
+		    unput (ch);
+		    goto tokout_;
+		} else {
+		    while (ch = input()) {
+			if (ch == ' ' || ch == '\t')
+			    continue;
+			else {
+			    char   bkgmsg[SZ_LINE+1];
+			    int    n = SZ_LINE;
+
+			    op = bkgmsg;
+			    unput (ch);
+			    if (bracelevel) {
+				eprintf (bkgerr);
+				return ('#');
+			    }
+
+			    while (--n >= 0 && (*op = input()) != '\n')
+				op++;
+			    *op = EOS;
+			    bkg_init (bkgmsg);
+			    return (Y_NEWLINE);
+			}
+		    }
+		    return (0);
+		}
+
+	    case ';':
+	    case '\n':
+		lexcol = 0;
+		lhs = 1;
+		goto etok_;
+
+	    case '\t':
+	    case ' ':
+		if (lexcol > 0)
+		    lhs = 0;
+		goto etok_;
+
+	    case '[':
+	    case ']':
+		/* [] are recognized as command mode metacharacters only
+		 * on the left hand side of an assignment statement.
+		 */
+		if (!lhs)
+		    goto deposit_;
+		/* Fall through */
+
+	    case '{':
+	    case '}':
+		/* We want to distinguish here between the use of {} for
+		 * the set selection operator in template strings, and the
+		 * conventional compound statement operator.  The distinction
+		 * is that { is recognized as a token only if occurs at the
+		 * beginning of a token, and } is recognized as a separate
+		 * token when inside a token only if it matches a { in the
+		 * same token.  Hence, alpha{xxx} is a single token in command
+		 * mode, whereas {xxx} is 3 tokens, the same as { xxx },
+		 * and xxx} is the same as xxx }.  Usage is completely
+		 * unambiguous if the { or } is preceded by a space.
+		 */
+		if (newtoken)
+		    return (ch);
+		if (stringtok) {
+		    if (ch == '{')
+			setlevel++;
+		    else if (setlevel == 0)
+			goto etok_;		/* } does not match { */
+		    else
+			--setlevel;
+		    goto deposit_;
+		}
+		/* fall through */
+
+	    case '=':
+etok_:		if (!newtoken) {
+		    unput (ch);
+		    goto tokout_;
+		} else if (ch == '\n') {
+		    return (Y_NEWLINE);
+		} else if (ch == '=') {
+		    token = ch;
+		    lhs = 0;
+		    goto eatwhite_;
+		} else
+		    return (ch);
+
+	    case '?':
+		/* ?, ?? menu commands, recognized only at beginning of stmt */
+		if (lexcol > 1) {
+		    goto deposit_;
+		} else if (ch = input()) {
+		    if (ch == '?')
+			return (crackident ("??"));
+		    else {
+			unput (ch);
+			return (crackident ("?"));
+		    }
+		} else
+		    return (0);
+
+	    case '+':
+	    case '-':
+		/* Plus and minus are recognized as the switch operators for
+		 * boolean parameters only if encountered while accumulating
+		 * a token and if followed by an argument delimiter, i.e.,
+		 * space, tab, newline, or semicolon.  If found at the beginning
+		 * of a token they are returned as a separate token and will be
+		 * interpreted by the parser as unary plus or minus.
+		 */
+		if (newtoken) {
+		    if (newarg) {
+			cch = input();
+			if (cch == 0)
+			    return (0);
+			unput (cch);
+
+			if (ch == '-' && isdigit (cch)) {
+			    unput (ch);
+			    newarg = 0;
+			    return (',');
+			} else {
+			    /* Not number; treat +- as a string char.
+			     */
+			    goto deposit_;
+			}
+
+		    } else {
+			cch = input();
+			if (cch == 0)
+			    return (0);
+
+			if (cch == '=') {
+			    if (ch == '+')
+				return (YOP_AOADD);
+			    else
+				return (YOP_AOSUB);
+			} else if (isdigit (cch)) {
+			    unput (cch);
+			    return (ch);
+			} else {
+			    unput (cch);
+			    goto deposit_;
+			}
+		    }
+
+		} else if (cch = input()) {
+		    clswitch = (isspace (cch) || cch == ';');
+		    if (cch == '=') {
+			unput(cch);
+			unput (ch);
+			goto tokout_;
+		    }
+		    unput (cch);
+		    if (clswitch) {
+			pbtoken = ch;
+			goto tokout_;
+		    } else
+			goto deposit_;
+		} else
+		    return (0);
+
+	    case '"':
+	    case '\'':
+		if (!newtoken) {
+		    unput (ch);
+		    goto tokout_;
+		} else if (newarg) {
+		    unput (ch);
+		    newarg = 0;
+		    return (',');
+		} else {
+		    traverse (ch);
+		    yylval = addconst (yytext, OT_STRING);
+		    return (Y_CONSTANT);
+		}
+
+	    case '\\':
+		if (ch = input()) {
+		    if (ch == '\n')
+			continue;
+		    else if (index ("&;=+-\"'\\#><()|", ch) != NULL)
+			goto deposit_;		/* put ch in string */
+		    else
+			goto escape_;		/* put \ch in string */
+		} else
+		    return (0);
+
+	    case '!':
+		/* OS escape is only recognized when the ! occurs as the first
+		 * token in a statement.
+		 */
+		if (lexcol > 1)
+		    goto deposit_;
+
+		/* Accumulate command.  Newline may be escaped to enter a long
+		 * command, but all other escapes are passed on unmodified.
+		 */
+		while ((ch = input()) && ch != '\n') {
+		    if (ch == '\\')
+			if (ch = input()) {
+			    if (ch == '\n')
+				continue;
+			    else
+				yytext[yyleng++] = '\\';
+			} else
+			    break;
+		    yytext[yyleng++] = ch;
+		}
+		if (ch)
+		    unput (ch);
+
+		yytext[yyleng] = '\0';
+		yylval = addconst (yytext, OT_STRING);
+		return (Y_OSESC);
+
+	    case '#':
+		/* Discard the comment line. */
+		while ((ch = input()) && ch != '\n')
+		    ;
+		if (ch) {
+		    unput (ch);
+		    continue;
+		} else
+		    return (0);
+
+	    case '>':
+	    case '<':
+	    case '(':
+		/* These characters are alike in that they all begin a new
+		 * argument when found in an argument list.
+		 */
+		if (!newtoken) {
+		    unput (ch);
+		    goto tokout_;
+		} else if (newarg) {
+		    unput (ch);
+		    newarg = 0;
+		    return (',');
+		} else if (ch == '<') {
+		    token = ch;
+		    goto eatwhite_;
+
+		} else if (ch == '>') {
+		    ch = input();
+		    if (ch == 0) {
+			return ('>');
+
+		    } else if (ch == '>') {
+			ch = input();
+			if (ch == 0) {
+			    return (Y_APPEND);
+			} else if (ch == 'G' || ch == 'I' || ch == 'P') {
+			    op = yytext;
+			    *op++ = '>';
+			    *op++ = '>';
+			    *op++ = ch;
+			    goto gsredir_;
+			} else if (ch == '&') {
+			    token = Y_ALLAPPEND;
+			    goto eatwhite_;
+			} else {
+			    unput (ch);
+			    token = Y_APPEND;
+			    goto eatwhite_;
+			}
+
+		    } else if (ch == 'G' || ch == 'I' || ch == 'P') {
+			/* Graphics stream redirection.
+			 */
+			op = yytext;
+			*op++ = '>';
+			*op++ = ch;
+gsredir_:
+			ch = input();
+			while (ch == 'G' || ch == 'I' || ch == 'P') {
+			    *op++ = ch;
+			    ch = input();
+			}
+			unput (ch);
+			*op = EOS;
+
+			yylval = addconst (yytext, OT_STRING);
+			token = Y_GSREDIR;
+			goto eatwhite_;
+
+		    } else if (ch == '&') {
+			token = Y_ALLREDIR;
+			goto eatwhite_;
+		    } else {
+			unput (ch);
+			token = '>';
+			goto eatwhite_;
+		    }
+
+		} else
+		    return ('(');
+
+	    case '|':
+		if (!newtoken) {
+		    unput (ch);
+		    goto tokout_;
+		} else if (ch = input()) {
+		    if (ch == '&')
+			return (Y_ALLPIPE);
+		    else {
+			unput (ch);
+			return ('|');
+		    }
+		} else
+		    return (0);
+
+	    case '*':
+	    case '/':
+		cch = input();
+		if (cch == 0)
+		    return (0);
+
+		if (newtoken) {
+		    if (cch == '=')
+			return ((ch=='*') ? YOP_AOMUL:YOP_AODIV);
+		    else {
+			unput (cch);
+			goto deposit_;
+		    }
+		} else {
+		    if (cch == '=') {
+			unput (cch);
+			unput (ch);
+			goto tokout_;
+		    } else {
+			unput (cch);
+			goto deposit_;
+		    }
+		}
+
+	    /* The following cases are included to force the compiler
+	     * to compile the case as an ASCII jump table.
+	     */
+	    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+	    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+	    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+	    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+	    case 'y': case 'z':
+	    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+	    case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+	    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+	    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+	    case 'Y': case 'Z':
+		/* fall through to default */
+
+	    default:
+		goto deposit_;
+escape_:	
+		/* Deposit a character preceded by the escape character.
+		 */
+		if (!newarg) {
+		    unput (ch);
+		    ch = '\\';
+		}
+deposit_:
+		/* If the last token returned was a string argument and we
+		 * are starting a second, a delimiter token must be returned
+		 * to delimit the two arguments.  Check for chars not legal
+		 * in an identifier so that we can know whether to return
+		 * CONSTANT or call crackident() which returns IDENT if not
+		 * a reserved keyword.
+		 */
+		if (newtoken) {
+		    identifier = 1;
+		    stringtok  = 1;
+		    setlevel   = 0;
+		    if (newarg) {
+			unput (ch);
+			newarg = 0;
+			return (',');
+		    }
+		}
+
+		yytext[yyleng++] = ch;
+		if (ch == '\\')
+		    yytext[yyleng++] = ch = input();
+		else if (!(isalnum(ch) || ch == '_' || ch == '$' || ch == '.'))
+		    identifier = 0;
+	    }
+	}
+
+tokout_:
+	yytext[yyleng] = '\0';
+
+	if (isdigit (yytext[0]) || yytext[0] == '.' && isdigit (yytext[1])) {
+	    int	token, toklen;
+
+	    token = c_lexnum (yytext, &toklen);
+	    if (token != LEX_NONNUM && toklen == yyleng) {
+		switch (token) {
+		case LEX_REAL:
+		    yylval = addconst (yytext, OT_REAL);
+		    break;
+		default:
+		    yylval = addconst (yytext, OT_INT);
+		    break;
+		}
+		return (Y_CONSTANT);
+	    }
+	}
+
+	if (identifier)
+	    return (crackident (yytext));
+	else {
+	    yylval = addconst (yytext, OT_STRING);
+	    return (Y_CONSTANT);
+	}
+
+eatwhite_:
+	/* Control transfers here after a token has been identified which is
+	 * followed by an associated argument (e.g. > file or < file).  Our
+	 * function is to discard any whitespace following the current token
+	 * in order to make whitespace optional in the input at this point.
+	 * This makes "> file" (for example) equivalent to ">file".
+	 */
+	newarg = 0;
+        while ((ch = input()) && (ch == ' ' || ch == '\t'))
+	    ;
+	if (ch) {
+	    unput (ch);
+	    return (token);
+	} else
+	    return (0);
+}
+
+
+/* LEXINIT -- Initialize the internal state variables of the lexical analyzer,
+ * e.g. when processing is interrupted by an interrupt.
+ */
+lexinit()
+{
+	if (lexmodes() && !lex_cpumodeset (currentask->t_in)) {
+	    lexcol = 0;
+	    newarg = 0;
+	    pbtoken = 0;
+	    lhs = 1;
+	    _lexmodes = 1;
+	} else
+	    _lexmodes = 0;
+}
author	Joseph Hunkeler <jhunkeler@gmail.com>	2015-07-08 20:46:52 -0400
committer	Joseph Hunkeler <jhunkeler@gmail.com>	2015-07-08 20:46:52 -0400
commit	fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch)
tree	bdda434976bc09c864f2e4fa6f16ba1952b1e555 /pkg/cl/lexicon.c
download	iraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz