From fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 Mon Sep 17 00:00:00 2001 From: Joseph Hunkeler Date: Wed, 8 Jul 2015 20:46:52 -0400 Subject: Initial commit --- sys/fmtio/lexnum.x | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 sys/fmtio/lexnum.x (limited to 'sys/fmtio/lexnum.x') diff --git a/sys/fmtio/lexnum.x b/sys/fmtio/lexnum.x new file mode 100644 index 00000000..f62b0ed2 --- /dev/null +++ b/sys/fmtio/lexnum.x @@ -0,0 +1,190 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include +include +include + +# LEXNUM -- Lexically analyse a character string, determine if string is +# a number, and if so, the type of number, and the number of characters +# in the number. The ip_start argument is left pointing at the first char +# of the number (or other token), and the number of chars in the number is +# returned as the third argument (0 if not a number). +# +# NOTE - See .doc/lexnum.hlp for a description of the states of the automaton. + +define SZ_STACK 15 + +# Lexical actions. "Reduce" means exit, returning code identifying lexical +# type of token. "Shift" means switch to a new state in the automaton. +# "Revert" means reduce class "other" in the previous state. + +define ACCEPT -6 # remain in same state +define REVERT -5 # revert to earlier state + + +# Character classes + +define SIGNCHAR 1 # +- +define OCTDIG 2 # 0-7 +define DECDIG 3 # 8-9 +define HEXDIG 4 # a-fA-F +define REALEXP 5 # eEdD +define SEXAG 6 # : +define FRACTION 7 # . +define HEXSUFFIX 8 # xX +define OCTSUFFIX 9 # bB +define OTHER 10 # invalid character +define NCC 10 + + +# States of the automaton + +define START 1 # initial state +define UNM 2 # unop or number +define ODH 3 # octal, decimal, hex, or real +define DHR 4 # decimal, hex, or real +define QRF 5 # maybe real fraction +define HEX 6 # hex +define QHX 7 # maybe hex or real exponent +define QRN 8 # maybe real number +define OHN 9 # octal or hex number +define RFR 10 # real fraction +define RRX 11 # real or real exponent +define QRX 12 # maybe real exponent +define HRX 13 # hex or real exponent +define RNM 14 # real number +define REX 15 # real exponent +define NSTATES 15 + + +# LEXNUM -- Determine if the next sequence of characters in the string STR +# can be interpreted as a number. Return the numeric type as the function +# value or LEX_NONNUM if the string is not a number. + +int procedure lexnum (str, ip_start, nchars) + +char str[ARB] # string to be decoded +int ip_start # starting index in string +int nchars # receives nchars in next token + +char ch +int stk_ip[SZ_STACK] +int ip, sp, cc, state, ip_save, toktype, act +short stk_state[SZ_STACK], action[NCC,NSTATES] +int strncmp() +include "lexdata.inc" + +begin + while (IS_WHITE (str[ip_start])) + ip_start = ip_start + 1 + ip = ip_start + + # INDEF is a legal number and is best dealt with as a special case. + if (str[ip] == 'I') + if (strncmp (str[ip], "INDEF", 5) == 0) { + nchars = 5 + return (LEX_REAL) + } + + state = START # initialization + ip_save = ip + sp = 0 + + repeat { + ch = str[ip] + + repeat { # determine character class + switch (ch) { + case '+','-': + cc = SIGNCHAR + break + case '0','1','2','3','4','5','6','7': + cc = OCTDIG + break + case '8','9': + cc = DECDIG + break + case 'B': + cc = OCTSUFFIX + break + case 'D','E': + cc = REALEXP + break + case 'A','C','F': + cc = HEXDIG + break + case ':': + cc = SEXAG + break + case '.': + cc = FRACTION + break + default: + if (IS_LOWER (ch)) + ch = TO_UPPER (ch) # and repeat + else if (ch == 'X') { + cc = HEXSUFFIX + break + } else { + cc = OTHER + break + } + } + } + +#call eprintf ("ip=%2d, sp=%2d, ch=%c, cc=%d, state=%d, action=%d\n") +#call pargi(ip); call pargi(sp) +#call pargc(ch); call pargi(cc); call pargi(state) +#call pargs(action[cc,state]) + + # Perform the action indicated by the action table when this + # class of character is encountered in the current state. + + act = action[cc,state] + if (act == ACCEPT) { + ip = ip + 1 # a simple optimization + next + } + + switch (act) { + case REVERT: + repeat { + ip = stk_ip[sp] + state = stk_state[sp] + toktype = action[OTHER,state] + sp = sp - 1 + } until (toktype != REVERT || sp <= 0) + + break + + case LEX_OCTAL, LEX_DECIMAL, LEX_HEX, LEX_REAL, LEX_NONNUM: + toktype = action[cc,state] + if (toktype == LEX_OCTAL && cc == OCTSUFFIX) + ip = ip + 1 # discard suffix char + else if (toktype == LEX_HEX && cc == HEXSUFFIX) + ip = ip + 1 + break + + default: # shift to new state + sp = sp + 1 + if (sp > SZ_STACK) { + toktype = LEX_NONNUM + break + } + stk_ip[sp] = ip + stk_state[sp] = state + + ip = ip + 1 + state = action[cc,state] + if (state < 1 || state > NSTATES) + call error (0, "In LEXNUM: cannot happen") + } + } + + if (toktype == LEX_NONNUM) + nchars = 0 + else + nchars = ip - ip_save + + return (toktype) +end -- cgit