1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
|
# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
include <mach.h>
include <ctype.h>
include <lexnum.h>
# LEXNUM -- Lexically analyse a character string, determine if string is
# a number, and if so, the type of number, and the number of characters
# in the number. The ip_start argument is left pointing at the first char
# of the number (or other token), and the number of chars in the number is
# returned as the third argument (0 if not a number).
#
# NOTE - See .doc/lexnum.hlp for a description of the states of the automaton.
define SZ_STACK 15
# Lexical actions. "Reduce" means exit, returning code identifying lexical
# type of token. "Shift" means switch to a new state in the automaton.
# "Revert" means reduce class "other" in the previous state.
define ACCEPT -6 # remain in same state
define REVERT -5 # revert to earlier state
# Character classes
define SIGNCHAR 1 # +-
define OCTDIG 2 # 0-7
define DECDIG 3 # 8-9
define HEXDIG 4 # a-fA-F
define REALEXP 5 # eEdD
define SEXAG 6 # :
define FRACTION 7 # .
define HEXSUFFIX 8 # xX
define OCTSUFFIX 9 # bB
define OTHER 10 # invalid character
define NCC 10
# States of the automaton
define START 1 # initial state
define UNM 2 # unop or number
define ODH 3 # octal, decimal, hex, or real
define DHR 4 # decimal, hex, or real
define QRF 5 # maybe real fraction
define HEX 6 # hex
define QHX 7 # maybe hex or real exponent
define QRN 8 # maybe real number
define OHN 9 # octal or hex number
define RFR 10 # real fraction
define RRX 11 # real or real exponent
define QRX 12 # maybe real exponent
define HRX 13 # hex or real exponent
define RNM 14 # real number
define REX 15 # real exponent
define NSTATES 15
# LEXNUM -- Determine if the next sequence of characters in the string STR
# can be interpreted as a number. Return the numeric type as the function
# value or LEX_NONNUM if the string is not a number.
int procedure lexnum (str, ip_start, nchars)
char str[ARB] # string to be decoded
int ip_start # starting index in string
int nchars # receives nchars in next token
char ch
int stk_ip[SZ_STACK]
int ip, sp, cc, state, ip_save, toktype, act
short stk_state[SZ_STACK], action[NCC,NSTATES]
int strncmp()
include "lexdata.inc"
begin
while (IS_WHITE (str[ip_start]))
ip_start = ip_start + 1
ip = ip_start
# INDEF is a legal number and is best dealt with as a special case.
if (str[ip] == 'I')
if (strncmp (str[ip], "INDEF", 5) == 0) {
nchars = 5
return (LEX_REAL)
}
state = START # initialization
ip_save = ip
sp = 0
repeat {
ch = str[ip]
repeat { # determine character class
switch (ch) {
case '+','-':
cc = SIGNCHAR
break
case '0','1','2','3','4','5','6','7':
cc = OCTDIG
break
case '8','9':
cc = DECDIG
break
case 'B':
cc = OCTSUFFIX
break
case 'D','E':
cc = REALEXP
break
case 'A','C','F':
cc = HEXDIG
break
case ':':
cc = SEXAG
break
case '.':
cc = FRACTION
break
default:
if (IS_LOWER (ch))
ch = TO_UPPER (ch) # and repeat
else if (ch == 'X') {
cc = HEXSUFFIX
break
} else {
cc = OTHER
break
}
}
}
#call eprintf ("ip=%2d, sp=%2d, ch=%c, cc=%d, state=%d, action=%d\n")
#call pargi(ip); call pargi(sp)
#call pargc(ch); call pargi(cc); call pargi(state)
#call pargs(action[cc,state])
# Perform the action indicated by the action table when this
# class of character is encountered in the current state.
act = action[cc,state]
if (act == ACCEPT) {
ip = ip + 1 # a simple optimization
next
}
switch (act) {
case REVERT:
repeat {
ip = stk_ip[sp]
state = stk_state[sp]
toktype = action[OTHER,state]
sp = sp - 1
} until (toktype != REVERT || sp <= 0)
break
case LEX_OCTAL, LEX_DECIMAL, LEX_HEX, LEX_REAL, LEX_NONNUM:
toktype = action[cc,state]
if (toktype == LEX_OCTAL && cc == OCTSUFFIX)
ip = ip + 1 # discard suffix char
else if (toktype == LEX_HEX && cc == HEXSUFFIX)
ip = ip + 1
break
default: # shift to new state
sp = sp + 1
if (sp > SZ_STACK) {
toktype = LEX_NONNUM
break
}
stk_ip[sp] = ip
stk_state[sp] = state
ip = ip + 1
state = action[cc,state]
if (state < 1 || state > NSTATES)
call error (0, "In LEXNUM: cannot happen")
}
}
if (toktype == LEX_NONNUM)
nchars = 0
else
nchars = ip - ip_save
return (toktype)
end
|