aboutsummaryrefslogtreecommitdiff
path: root/sys/fmtio/lexnum.x
blob: f62b0ed268039a5b31c3abf247de7671d0b6d1e3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.

include	<mach.h>
include	<ctype.h>
include	<lexnum.h>

# LEXNUM -- Lexically analyse a character string, determine if string is
# a number, and if so, the type of number, and the number of characters
# in the number.  The ip_start argument is left pointing at the first char
# of the number (or other token), and the number of chars in the number is
# returned as the third argument (0 if not a number).
#
# NOTE - See .doc/lexnum.hlp for a description of the states of the automaton.

define	SZ_STACK	15

# Lexical actions.  "Reduce" means exit, returning code identifying lexical
# type of token.  "Shift" means switch to a new state in the automaton.
# "Revert" means reduce class "other" in the previous state.

define	ACCEPT		-6		# remain in same state
define	REVERT		-5		# revert to earlier state


# Character classes

define	SIGNCHAR	1		# +-
define	OCTDIG		2		# 0-7
define	DECDIG		3		# 8-9
define	HEXDIG		4		# a-fA-F
define	REALEXP		5		# eEdD
define	SEXAG		6		# :
define	FRACTION	7		# .
define	HEXSUFFIX	8		# xX
define	OCTSUFFIX	9		# bB
define	OTHER		10		# invalid character
define	NCC		10


# States of the automaton

define	START		1		# initial state
define	UNM		2		# unop or number
define	ODH		3		# octal, decimal, hex, or real
define	DHR		4		# decimal, hex, or real
define	QRF		5		# maybe real fraction
define	HEX		6		# hex
define	QHX		7		# maybe hex or real exponent
define	QRN		8		# maybe real number
define	OHN		9		# octal or hex number
define	RFR		10		# real fraction
define	RRX		11		# real or real exponent
define	QRX		12		# maybe real exponent
define	HRX		13		# hex or real exponent
define	RNM		14		# real number
define	REX		15		# real exponent
define	NSTATES		15


# LEXNUM -- Determine if the next sequence of characters in the string STR
# can be interpreted as a number.  Return the numeric type as the function
# value or LEX_NONNUM if the string is not a number.

int procedure lexnum (str, ip_start, nchars)

char	str[ARB]		# string to be decoded
int	ip_start		# starting index in string
int	nchars			# receives nchars in next token

char	ch
int	stk_ip[SZ_STACK]
int	ip, sp, cc, state, ip_save, toktype, act
short	stk_state[SZ_STACK], action[NCC,NSTATES]
int	strncmp()
include	"lexdata.inc"

begin
	while (IS_WHITE (str[ip_start]))
	    ip_start = ip_start + 1
	ip = ip_start

	# INDEF is a legal number and is best dealt with as a special case.
	if (str[ip] == 'I')
	    if (strncmp (str[ip], "INDEF", 5) == 0) {
		nchars = 5
		return (LEX_REAL)
	    }

	state = START				# initialization
	ip_save = ip
	sp = 0

	repeat {
	    ch = str[ip]

	    repeat {				# determine character class
		switch (ch) {
		case '+','-':
		    cc = SIGNCHAR
		    break
		case '0','1','2','3','4','5','6','7':
		    cc = OCTDIG
		    break
		case '8','9':
		    cc = DECDIG
		    break
		case 'B':
		    cc = OCTSUFFIX
		    break
		case 'D','E':
		    cc = REALEXP
		    break
		case 'A','C','F':
		    cc = HEXDIG
		    break
		case ':':
		    cc = SEXAG
		    break
		case '.':
		    cc = FRACTION
		    break
		default:
		    if (IS_LOWER (ch))
			ch = TO_UPPER (ch)	# and repeat
		    else if (ch == 'X') {
			cc = HEXSUFFIX
			break
		    } else {
			cc = OTHER
			break
		    }
		}
	    }

#call eprintf ("ip=%2d, sp=%2d, ch=%c, cc=%d, state=%d, action=%d\n")
#call pargi(ip); call pargi(sp)
#call pargc(ch); call pargi(cc); call pargi(state)
#call pargs(action[cc,state])

	    # Perform the action indicated by the action table when this
	    # class of character is encountered in the current state.

	    act = action[cc,state]
	    if (act == ACCEPT) {
		ip = ip + 1			# a simple optimization
		next
	    }

	    switch (act) {
	    case REVERT:
		repeat {
		    ip = stk_ip[sp]
		    state = stk_state[sp]
		    toktype = action[OTHER,state]
		    sp = sp - 1
		} until (toktype != REVERT || sp <= 0)

		break

	    case LEX_OCTAL, LEX_DECIMAL, LEX_HEX, LEX_REAL, LEX_NONNUM:
		toktype = action[cc,state]
		if (toktype == LEX_OCTAL && cc == OCTSUFFIX)
		    ip = ip + 1			# discard suffix char
		else if (toktype == LEX_HEX && cc == HEXSUFFIX)
		    ip = ip + 1
		break

	    default:				# shift to new state
		sp = sp + 1
		if (sp > SZ_STACK) {
		    toktype = LEX_NONNUM
		    break
		}
		stk_ip[sp] = ip
		stk_state[sp] = state

		ip = ip + 1
		state = action[cc,state]
		if (state < 1 || state > NSTATES)
		    call error (0, "In LEXNUM: cannot happen")
	    }
	}

	if (toktype == LEX_NONNUM)
	    nchars = 0
	else
	    nchars = ip - ip_save

	return (toktype)
end