aboutsummaryrefslogtreecommitdiff
path: root/sys/qpoe/qpgettok.x
blob: feb8d7801a88cecd1f5eac91eba6b344b05e3cb7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.

include	<syserr.h>
include	<error.h>
include	<ctype.h>
include	<fset.h>
include	"qpoe.h"

.help gettok
.nf --------------------------------------------------------------------------
GETTOK -- Lexical input routines for QPOE.  Used to return tokens from input
text; this is where all macro expansion and file expansion takes place.

	       gt = qp_opentext (qp, text)
	      token = qp_gettok (gt, tokbuf, maxch)
		    qp_ungettok (gt, tokbuf)
	      token = qp_rawtok (gt, tokbuf, maxch)
	     token = qp_nexttok (gt)
		   qp_closetext (gt)

Access to the package is gained by opening a text string with QP_OPENTEXT.
This returns a descriptor which is passed to QP_GETTOK to read successive
tokens, which may come from the input text string or from any macros,
include files, etc., referenced in the text or in any substituted text.
QP_UNGETTOK pushes a token back into the QP_GETTOK input stream, to be
returned in the next QP_GETTOK call (following macro expansion).

QP_RAWTOK returns the next physical token from an input stream (without
macro expansion), and QP_NEXTTOK returns the type of the next *physical*
token (no macro expansion) without actually fetching it (for look ahead
decision making).

The tokens that can be returned are as follows:

	TOK_IDENTIFIER		[a-zA-Z][a-zA-Z0-9_]*
	TOK_NUMBER		[0-9][0-9a-zA-Z.]*(e|E)?(+|-)?[0-9]*
	TOK_STRING		if "abc" or 'abc', the abc
	'c'			other characters, e.g., =+-*/,;:()[] etc
	EOF			at end of input

Macro replacement syntax:

	macro			push macro with null arglist
	macro(arg,arg,...)	push macro with argument substitution
	@file			push contents of file
	@file(arg,arg,...)	push file with argument substitution
	`cmd`			substitute output of CL command "cmd"

where
	macro			is an identifier, the name of a global macro
				or a datafile local macro (parameter)

In all cases, occurences of $N in the replacement text are replaced by the
macro arguments if any, and macros are recursively expanded.  Whitespace,
including newline, equates to a single space, as does EOF (hence always
delimits tokens).  Comments (# to end of line) are ignored.  All identifiers
in scanned text are checked to see if they are references to predefined global
or local (datafile) macros.

A global macro is a symbol defined globally for QPOE, in effect for all poefile
accesses (see qpmacro.x).  A local macro is a macro defined as a string
parameter of type TY_MACRO in the poefile header (and hence affecting only
that one datafile).
.endhelp ---------------------------------------------------------------------

# General definitions.
define	MAX_LEVELS	20		# max include file nesting
define	MAX_ARGS	9		# max arguments to a macro
define	SZ_CMD		80		# `cmd`
define	SZ_IBUF		8192		# buffer for macro replacement
define	SZ_OBUF		8192		# buffer for macro replacement
define	SZ_ARGBUF	256		# argument list to a macro

# The gettok descriptor.
define	LEN_GTDES	45
define	GT_QP		Memi[$1]	# backpointer to QPOE descriptor
define	GT_FD		Memi[$1+1]	# current input stream
define	GT_NEXTCH	Memi[$1+2]	# lookahead character
define	GT_FTEMP	Memi[$1+3]	# file on stream is a temp file
define	GT_LEVEL	Memi[$1+4]	# current nesting level
define	GT_SVFD		Memi[$1+5+$2-1]	# stacked file descriptors
define	GT_SVFTEMP	Memi[$1+25+$2-1]# stacked ftemp flags


# QP_OPENTEXT -- Open the QP_GETTOK descriptor.  The descriptor is initially
# opened on the user supplied string buffer (which is opened as a file and
# which must remain intact while token input is in progress), but include file
# processing etc. may cause arbitrary nesting of file descriptors.

pointer procedure qp_opentext (qp, text)

pointer	qp			#I QPOE descriptor
char	text[ARB]		#I input text to be scanned

pointer	gt
int	sz_pbbuf
int	stropen(), strlen()
errchk	stropen, calloc

begin
	call calloc (gt, LEN_GTDES, TY_STRUCT)

	GT_QP(gt) = qp
	GT_FD(gt) = stropen (text, strlen(text), READ_ONLY)

	if (qp == NULL)
	    sz_pbbuf = DEF_MAXPUSHBACK
	else
	    sz_pbbuf = QP_SZPBBUF(qp)
	call fseti (GT_FD(gt), F_PBBSIZE, sz_pbbuf)

	return (gt)
end


# QP_GETTOK -- Return the next token from the input stream.  The token ID
# (a predefined integer code or the character value) is returned as the
# function value.  The text of the token is returned as an output argument.
# Any macro references, file includes, etc., are performed in the process
# of scanning the input stream, hence only fully resolved tokens are output.

int procedure qp_gettok (gt, tokbuf, maxch)

pointer	gt			#I gettok descriptor
char	tokbuf[maxch]		#O receives the text of the token
int	maxch			#I max chars out

pointer	sp, bp, qp, cmd, ibuf, obuf, argbuf, fname, sym, textp
int	fd, token, level, nargs, nchars, i_fd, o_fd, ftemp

bool	streq()
pointer	qp_gmsym()
int	strmac(), open(), stropen()
int	qp_rawtok(), qp_nexttok(), qp_arglist()
errchk	qp_rawtok,close,ungetci,ungetline,qp_arglist,clcmdw,stropen,syserr
define	pushfile_ 91


begin
	call smark (sp)

	# Allocate some buffer space.
	nchars = SZ_CMD + SZ_IBUF + SZ_OBUF + SZ_ARGBUF + SZ_FNAME + 5
	call salloc (bp, nchars, TY_CHAR)

	cmd = bp
	ibuf = cmd + SZ_CMD + 1
	obuf = ibuf + SZ_IBUF + 1
	argbuf = obuf + SZ_OBUF + 1
	fname = argbuf + SZ_ARGBUF + 1

	qp = GT_QP(gt)

	# Read raw tokens and push back macro or include file text until we
	# get a fully resolved token.

	repeat {
	    fd = GT_FD(gt)

	    # Get a raw token.
	    token = qp_rawtok (gt, tokbuf, maxch)

	    # Process special tokens.
	    switch (token) {
	    case EOF:
		# EOF has been reached on the current stream.
		level = GT_LEVEL(gt)
		if (GT_FTEMP(gt) == YES) {
		    call fstats (fd, F_FILENAME, Memc[fname], SZ_FNAME)
		    if (level > 0)
			call close (fd)
		    iferr (call delete (Memc[fname]))
			call erract (EA_WARN)
		} else if (level > 0)
		    call close (fd)

		if (level > 0) {
		    # Restore previous stream.
		    GT_FD(gt)     = GT_SVFD(gt,level)
		    GT_FTEMP(gt)  = GT_SVFTEMP(gt,level)
		    GT_LEVEL(gt)  = level - 1
		    GT_NEXTCH(gt) = NULL
		} else {
		    # Return EOF token to caller.
		    call strcpy ("EOF", tokbuf, maxch)
		    break
		}

	    case TOK_IDENTIFIER:
		# Lookup the identifier in the symbol table.
		sym = NULL
		if (qp != NULL)
		    sym = qp_gmsym (qp, tokbuf, textp)

		# Process a defined macro.
		if (sym != NULL) {
		    # If macro does not have any arguments, merely push back
		    # the replacement text.

		    if (and (S_FLAGS(sym), SF_MACARGS) == 0) {
			if (GT_NEXTCH(gt) > 0) {
			    call ungetci (fd, GT_NEXTCH(gt))
			    GT_NEXTCH(gt) = 0
			}
			call ungetline (fd, Memc[textp])
			next
		    }

		    # Extract argument list, if any, perform argument
		    # substitution on the macro, and push back the edited
		    # text to be rescanned.

		    if (qp_nexttok(gt) == '(') {
			nargs = qp_arglist (gt, Memc[argbuf], SZ_ARGBUF)

			# Pushback the text of a macro with arg substitution.
			nchars = strmac (Memc[textp], Memc[argbuf],
			    Memc[obuf], SZ_OBUF)
			if (GT_NEXTCH(gt) > 0) {
			    call ungetci (fd, GT_NEXTCH(gt))
			    GT_NEXTCH(gt) = 0
			}
			call ungetline (fd, Memc[obuf])
			next

		    } else {
			call eprintf ("macro `%s' called with no arguments\n")
			    call pargstr (tokbuf)
		    }
		}

		# Check for the builtin symbol $DFN, the datafile name.
		if (tokbuf[1] == '$') {
		    if (streq (tokbuf, "$DFN")) {
			call strcpy (QP_DFNAME(qp), tokbuf, maxch)
			token = TOK_STRING
			break
		    }
		}

		# Return a regular identifier.
		break

	    case TOK_COMMAND:
		# Send a command to the CL and push back the output.

		# Execute the command, spooling the output in a temp file.
		call mktemp ("tmp$co", Memc[fname], SZ_FNAME)
		call sprintf (Memc[cmd], SZ_LINE, "%s > %s")
		    call pargstr (tokbuf)
		    call pargstr (Memc[fname])
		call clcmdw (Memc[cmd])

		# Open the output file as input text.
		call strcpy (Memc[fname], tokbuf, maxch)
		nargs = 0
		ftemp = YES
		goto pushfile_

	    case '@':
		token = qp_rawtok (gt, tokbuf, maxch)
		if (token != TOK_IDENTIFIER && token != TOK_STRING) {
		    call eprintf ("expected a filename after the `@'\n")
		    next
		} else {
		    nargs = 0
		    if (qp_nexttok(gt) == '(')		# )
			nargs = qp_arglist (gt, Memc[argbuf], SZ_ARGBUF)
		    ftemp = NO
		}
pushfile_
		# Attempt to open the file; first try the given name, then
		# if that doesn't work, try adding the macro file extension.

		iferr (i_fd = open (tokbuf, READ_ONLY, TEXT_FILE)) {
		    call qp_mkfname (tokbuf,
			QPOE_MACROEXTN, Memc[fname], SZ_FNAME)
		    iferr (i_fd = open (Memc[fname],READ_ONLY,TEXT_FILE)) {
			call eprintf ("cannot open `%s'\n")
			    call pargstr (tokbuf)
			next
		    }
		}

		if (qp != NULL)
		    call fseti (i_fd, F_PBBSIZE, QP_SZPBBUF(qp))
		else
		    call fseti (i_fd, F_PBBSIZE, DEF_MAXPUSHBACK)

		# Cancel lookahead.
		if (GT_NEXTCH(gt) > 0) {
		    call ungetci (fd, GT_NEXTCH(gt))
		    GT_NEXTCH(gt) = 0
		}

		# If the macro was called with a nonnull argument list,
		# attempt to perform argument substitution on the file
		# contents.  Otherwise merely push the fd.

		if (nargs > 0) {
		    # Pushback file contents with argument substitution.
		    o_fd = stropen (Memc[ibuf], SZ_IBUF, NEW_FILE)

		    call fcopyo (i_fd, o_fd)
		    nchars = strmac (Memc[ibuf],Memc[argbuf],Memc[obuf],SZ_OBUF)
		    call ungetline (fd, Memc[obuf])

		    call close (o_fd)
		    call close (i_fd)

		} else {
		    # Push a new input stream.
		    level = GT_LEVEL(gt) + 1
		    if (level > MAX_LEVELS)
			call syserr (SYS_QPMRECUR)

		    GT_SVFD(gt,level) = GT_FD(gt)
		    GT_SVFTEMP(gt,level) = GT_FTEMP(gt)
		    GT_LEVEL(gt) = level

		    fd = i_fd
		    GT_FD(gt) = fd
		    GT_FTEMP(gt) = ftemp
		}

	    default:
		break
	    }
	}

	if (qp != NULL)
	    if (QP_DEBUG(qp) > 4) {
		call eprintf ("token=%d(%o), `%s'\n")
		    call pargi (token)
		    call pargi (max(0,token))
		    if (IS_PRINT(tokbuf[1]))
			call pargstr (tokbuf)
		    else
			call pargstr ("")
	    }

	call sfree (sp)
	return (token)
end


# QP_UNGETTOK -- Push a token back into the QP_GETTOK input stream, to be
# returned as the next token by QP_GETTOK.

procedure qp_ungettok (gt, tokbuf)

pointer	gt			#I gettok descriptor
char	tokbuf[ARB]		#I text of token

int	fd
pointer	qp
errchk	ungetci

begin
	fd = GT_FD(gt)
	qp = GT_QP(gt)

	if (qp != NULL)
	    if (QP_DEBUG(qp) > 4) {
		call eprintf ("unget token `%s'\n")
		    call pargstr (tokbuf)
	    }

	# Cancel lookahead.
	if (GT_NEXTCH(gt) > 0) {
	    call ungetci (fd, GT_NEXTCH(gt))
	    GT_NEXTCH(gt) = 0
	}

	# First push back a space to ensure that the token is recognized
	# when the input is rescanned.

	call ungetci (fd, ' ')

	# Now push the token text.
	call ungetline (fd, tokbuf)
end
 

# QP_RAWTOK -- Get a raw token from the input stream, without performing any
# macro expansion or file inclusion.  The text of the token in returned in
# tokbuf, and the token type is returened as the function value.

int procedure qp_rawtok (gt, outstr, maxch)

pointer	gt			#I gettok descriptor
char	outstr[maxch]		#O receives text of token.
int	maxch			#I max chars out

int	token, delim, fd, ch, op
define	again_ 91
int	getci()

begin
	fd = GT_FD(gt)
again_
	# Get lookahead char if we don't already have one.
	ch = GT_NEXTCH(gt)
	GT_NEXTCH(gt) = NULL
	if (ch <= 0 || IS_WHITE(ch) || ch == '\n') {
	    while (getci (fd, ch) != EOF)
		if (!(IS_WHITE(ch) || ch == '\n'))
		    break
	}

	# Output the first character.
	op = 1
	if (ch != EOF && ch != '"' && ch != '\'' && ch != '`') {
	    outstr[op] = ch
	    op = op + 1
	}

	# Accumulate token.  Some of the token recognition logic used here
	# (especially for numbers) is crude, but it is not clear that rigour
	# is justified for this application.

	if (ch == EOF) {
	    call strcpy ("EOF", outstr, maxch)
	    token = EOF

	} else if (ch == '#') {
	    # Ignore a comment.
	    while (getci (fd, ch) != '\n')
		if (ch == EOF)
		    break
	    goto again_

	} else if (IS_ALPHA(ch) || ch == '_' || ch == '$' || ch == '.') {
	    # Identifier.
	    token = TOK_IDENTIFIER
	    while (getci (fd, ch) != EOF)
		if (IS_ALNUM(ch) || ch == '_' || ch == '$' || ch == '.') {
		    outstr[op] = ch
		    op = min (maxch, op+1)
		} else
		    break

	} else if (IS_DIGIT(ch)) {
	    # Number.
	    token = TOK_NUMBER

	    # Get number.
	    while (getci (fd, ch) != EOF)
		if (IS_ALNUM(ch) || ch == '.') {
		    outstr[op] = ch
		    op = min (maxch, op+1)
		} else
		    break

	    # Get exponent if any.
	    if (ch == 'E' || ch == 'e') {
		outstr[op] = ch
		op = min (maxch, op+1)
		while (getci (fd, ch) != EOF)
		    if (IS_DIGIT(ch) || ch == '+' || ch == '-') {
			outstr[op] = ch
			op = min (maxch, op+1)
		    } else
			break
	    }

	} else if (ch == '"' || ch == '\'' || ch == '`') {
	    # Quoted string or command.

	    if (ch == '`')
		token = TOK_COMMAND
	    else
		token = TOK_STRING

	    delim = ch
	    while (getci (fd, ch) != EOF)
		if (ch==delim && (op>1 && outstr[op-1] != '\\') || ch == '\n')
		    break
		else {
		    outstr[op] = ch
		    op = min (maxch, op+1)
		}
	    ch = getci (fd, ch)

	} else if (ch == '+') {
	    # May be the += operator.
	    if (getci (fd, ch) != EOF)
		if (ch == '=') {
		    token = TOK_PLUSEQUALS
		    outstr[op] = ch
		    op = op + 1
		    ch = getci (fd, ch)
		} else
		    token = '+'

	} else if (ch == ':') {
	    # May be the := operator.
	    if (getci (fd, ch) != EOF)
		if (ch == '=') {
		    token = TOK_COLONEQUALS
		    outstr[op] = ch
		    op = op + 1
		    ch = getci (fd, ch)
		} else
		    token = ':'

	} else {
	    # Other characters.
	    token = ch
	    ch = getci (fd, ch)
	}

	# Process the lookahead character.
	if (IS_WHITE(ch) || ch == '\n') {
	    repeat {
		ch = getci (fd, ch)
	    } until (!(IS_WHITE(ch) || ch == '\n'))
	}

	if (ch != EOF)
	    GT_NEXTCH(gt) = ch

	outstr[op] = EOS
	return (token)
end


# QP_NEXTTOK -- Determine the type of the next raw token in the input stream,
# without actually fetching the token.  TOK_PLUSEQUALS is not recognized at
# this level.  Note that this is at the same level as QP_RAWTOK, i.e., no
# macro expansion is performed, and the lookahead token is that which would
# be returned by the next qp_rawtok, which is not necessarily what qp_gettok
# would return after macro replacement.

int procedure qp_nexttok (gt)

pointer	gt			#I gettok descriptor

pointer	qp
int	token, fd, ch
int	getci()

begin
	fd = GT_FD(gt)
	qp = GT_QP(gt)

	# Get lookahead char if we don't already have one.
	ch = GT_NEXTCH(gt)
	if (ch <= 0 || IS_WHITE(ch) || ch == '\n')
	    while (getci (fd, ch) != EOF)
		if (!(IS_WHITE(ch) || ch == '\n'))
		    break

	if (ch == EOF)
	    token = EOF
	else if (IS_ALPHA(ch))
	    token = TOK_IDENTIFIER
	else if (IS_DIGIT(ch))
	    token = TOK_NUMBER
	else if (ch == '"' || ch == '\'') 
	    token = TOK_STRING
	else if (ch == '`')
	    token = TOK_COMMAND
	else
	    token = ch

	if (qp != NULL)
	    if (QP_DEBUG(qp) > 4) {
		call eprintf ("nexttok=%d(%o) `%c'\n")
		    call pargi (token)
		    call pargi (max(0,token))
		    if (IS_PRINT(ch))
			call pargi (ch)
		    else
			call pargi (0)
	    }

	return (token)
end


# QP_CLOSETEXT -- Close the gettok descriptor and any files opened thereon.

procedure qp_closetext (gt)

pointer	gt			#I gettok descriptor

int	level, fd
pointer	sp, fname

begin
	call smark (sp)
	call salloc (fname, SZ_FNAME, TY_CHAR)

	for (level=GT_LEVEL(gt);  level >= 0;  level=level-1) {
	    fd = GT_FD(gt)
	    if (GT_FTEMP(gt) == YES) {
		call fstats (fd, F_FILENAME, Memc[fname], SZ_FNAME)
		call close (fd)
		iferr (call delete (Memc[fname]))
		    call erract (EA_WARN)
	    } else
		call close (fd)

	    if (level > 0) {
		GT_FD(gt)    = GT_SVFD(gt,level)
		GT_FTEMP(gt) = GT_SVFTEMP(gt,level)
	    }
	}

	call mfree (gt, TY_STRUCT)
	call sfree (sp)
end


# QP_ARGLIST -- Extract a paren and comma delimited argument list to be used
# for substitution into a macro replacement string.  Since the result will be
# pushed back and rescanned, we do not have to perform macro substitution on
# the argument list at this level.

int procedure qp_arglist (gt, argbuf, maxch)

pointer	gt			#I gettok descriptor
char	argbuf[maxch]		#O receives parsed arguments
int	maxch			#I max chars out

int	level, quote, nargs, op, ch, fd
int	getci()

begin
	fd = GT_FD(gt)

	# Get lookahead char if we don't already have one.
	ch = GT_NEXTCH(gt)
	if (ch <= 0 || IS_WHITE(ch) || ch == '\n')
	    while (getci (fd, ch) != EOF)
		if (!(IS_WHITE(ch) || ch == '\n'))
		    break

	quote = 0
	level = 1
	nargs = 0
	op = 1

	if (ch == '(') {
	    while (getci (fd, ch) != EOF) {
		if (ch == '"' || ch == '\'') {
		    if (quote == 0)
			quote = ch
		    else if (quote == ch)
			quote = 0

		} else if (ch == '(' && quote == 0) {
		    level = level + 1
		} else if (ch == ')' && quote == 0) {
		    level = level - 1
		    if (level <= 0) {
			if (op > 1 && argbuf[op-1] != EOS)
			    nargs = nargs + 1
			break
		    }

		} else if (ch == ',' && level == 1 && quote == 0) {
		    ch = EOS
		    nargs = nargs + 1
		} else if (ch == '\n') {
		    ch = ' '
		} else if (ch == '\\' && quote == 0) {
		    ch = getci (fd, ch)
		    next
		} else if (ch == '#' && quote == 0) {
		    while (getci (fd, ch) != EOF)
			if (ch == '\n')
			    break
		    next
		}

		argbuf[op] = ch
		op = min (maxch, op + 1)
	    }

	    GT_NEXTCH(gt) = NULL
	}

	argbuf[op] = EOS
	return (nargs)
end