pkg/bench/xctest/tokens.x


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.

include	<ctotok.h>

.help tokens
.nf ___________________________________________________________________________
TOKENS -- Break the input up into a series of tokens.  The makeup of the
various tokens is defined by the FMTIO primitive ctotok, which is not very 
sophisticated, and does not claim to recognize the tokens for any particular
language (though it does reasonably well for most modern languages).  Comments
can be deleted if desired, and newlines may be passed on to the output as
tokens.

Comments are delimited by user specified strings.  Only strings which are also
recognized by ctotok() as legal tokens may be used as comment delimiters.
If newline marks the end of a comment, the end_comment string should be given
as "eol".  Examples of acceptable comment conventions are ("#", eol),
("/*", "*/"), ("{", "}"), and ("!", eol).  Fortran style comments ("^{c}",eol)
can be stripped by filtering with match beforehand.

Each token is passed to the output on a separate line.  Multiple newline
tokens are compressed to a single token (a blank line).  If newline is not
desired as an output token, it is considered whitespace and serves only to
delimit tokens.
.endhelp ______________________________________________________________________

define	SZ_COMDELIMSTR	20		# Comment delimiter string.

procedure t_tokens()

bool	ignore_comments, comment_delimiter_is_eol
bool	in_comment, pass_newlines
char	begin_comment[SZ_COMDELIMSTR], end_comment[SZ_COMDELIMSTR]
int	fd, list, token, last_token, last_nscan
pointer	sp, fname, tokbuf, outstr, ip, op

bool	streq(), clgetb()
int	clpopni(), clgfil(), fscan(), nscan(), open(), ctocc()

begin
	call smark (sp)
	call salloc (fname, SZ_FNAME, TY_CHAR)
	call salloc (tokbuf, SZ_LINE, TY_CHAR)
	call salloc (outstr, SZ_LINE, TY_CHAR)

	# If comments are to be ignored, get comment delimiters.
	ignore_comments = clgetb ("ignore_comments")
	if (ignore_comments) {
	    call clgstr ("begin_comment", begin_comment, SZ_COMDELIMSTR)
	    call clgstr ("end_comment", end_comment, SZ_COMDELIMSTR)
	    comment_delimiter_is_eol = streq (end_comment, "eol")
	} else {
	    # Set begin_comment to null string to ensure that we never
	    # enter skip comment mode.  This requires that we check for the
	    # EOS token before the begin_comment token below.
	    begin_comment[1] = EOS
	}

	# Is newline a token?
	pass_newlines = clgetb ("newlines")


	# Merge all input files into a single stream of tokens on the standard
	# output.
	list = clpopni ("files")

	while (clgfil (list, Memc[fname], SZ_FNAME) != EOF) {
	    fd = open (Memc[fname], READ_ONLY, TEXT_FILE)
	    last_token = NULL

	    while (fscan (fd) != EOF) {
		# Break input line into a stream of tokens.
		repeat {
		    last_nscan = nscan()
		    call gargtok (token, Memc[tokbuf], SZ_LINE)

		    # If "nscan" did not increment (actually impossible with
		    # gargtok) the line has been exhausted.
		    if (nscan() == last_nscan)
			break

		    # If busy ignoring a comment, check for delimiter.
		    if (in_comment) {
			if (comment_delimiter_is_eol &&
			(token == TOK_NEWLINE || token == TOK_EOS)) {
			    in_comment = false
			    if (pass_newlines && last_token != TOK_NEWLINE) {
				call printf ("\n")
				last_token = TOK_NEWLINE
			    }
			    break
			} else if (streq (Memc[tokbuf], end_comment)) {
			    in_comment = false
			    next
			} else
			    next
		    }

		    # If we get here, we are not processing a comment.

		    if (token == TOK_NEWLINE) {
			if (pass_newlines && last_token != TOK_NEWLINE)
			    call printf ("\n")
			last_token = TOK_NEWLINE
			break

		    } else if (token == TOK_EOS) {
			# EOS is not counted as a token (do not set last_token,
			# do not generate any output).
			break

		    } else if (streq (Memc[tokbuf], begin_comment)) {
			in_comment = true
			# Do not change last_token, since comment token
			# is to be ignored.
			next

		    } else if (token == TOK_STRING) {
			# Convert control characters into printable
			# sequences before printing string token.
			op = outstr
			for (ip=tokbuf;  Memc[ip] != EOS;  ip=ip+1)
			    op = op + ctocc (Memc[ip], Memc[op], SZ_LINE)
			call printf ("\"%s\"\n")
			    call pargstr (Memc[outstr])

		    } else {				# most tokens
			call printf ("%s\n")
			    call pargstr (Memc[tokbuf])
		    }

		    last_token = token
		}
	    }
	    call close (fd)
	}

	call clpcls (list)
	call sfree (sp)
end