aboutsummaryrefslogtreecommitdiff
path: root/pkg/tbtables/tbbwrd.x
blob: 024fe2421dcd439c128de865b440a2312451e6fc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
include <ctype.h>		# for IS_DIGIT

define	LEFT_MARGIN	0	# extra space at left of first column

# tbbwrd -- read a word from the input buffer
# This routine extracts the next word from the input buffer, returning
# the word itself and the number of characters that it contains.
# The function value is the number of char converted, as returned by
# ctowrd.  If the value is a string (i.e. not numeric), then trailing
# blanks will be truncated.  If the value is a single blank, however,
# that blank will not be deleted.
#
# If the word is numeric, PREC is the number of digits of precision
# in the word; for HH:MM:SS.d or HH:MM.d format PREC is the number of
# digits after the decimal point.  The idea is that the values of WIDTH
# and PREC returned by this routine can go directly into a format code.
#
# The format code FCODE and precision PREC are really only used if the
# data type is double.

# Phil Hodge,  3-Mar-1992  Subroutine created.
# Phil Hodge,  7-Aug-1992  Include checks on string beginning with number;
#			add fcode to the calling sequence.
# Phil Hodge, 10-Sep-1992  Set datatype to double if word is INDEF.
# Phil Hodge,  7-Jun-1994  Set type to char if more than one decimal point.
# Phil Hodge, 27-Jul-1994  Change LEFT_MARGIN from 3 to 0 (no longer needed).

int procedure tbbwrd (buf, ip, word, maxch, width, prec, datatype, fcode)

char	buf[ARB]		# i: buffer containing line from file
int	ip			# io: starting location in buffer
char	word[ARB]		# o: word extracted from buffer
int	maxch			# i: max size of word
int	width			# o: width of column
int	prec			# o: digits of precision in this word
int	datatype		# o: TY_DOUBLE, TY_INT, or TY_CHAR
int	fcode			# o: format code for print format
#--
char	cval			# one character in the word
int	ip_start		# ip before calling ctowrd
int	word_width		# width of extracted word (value of ctowrd)
int	i			# loop index
int	num_colon		# number of ':' found in word
bool	quote			# true if value begins with ' or "
bool	exponent		# true if there's an exponent in the word
bool	dec_point		# true if there's a decimal point in the word
int	ctowrd(), strlen(), strncmp()
bool	streq()

define	chartype_ 91
define	finished_ 93

begin
	ip_start = ip
	datatype = 0
	fcode = 's'

	word_width = ctowrd (buf, ip, word, maxch)
	if (word_width < 1)
	    return (0)

	# These may be updated later.
	width = word_width
	prec = width

	# Check whether column begins with a quote, indicating that it's
	# a string, even if the word itself is numeric, e.g. "3.14159".
	quote = false
	do i = ip_start, ip {
	    if (buf[i] != ' ') {
		if (buf[i] == '"' || buf[i] == '\'')
		    quote = true
		break
	    }
	}

	# Get a first estimate of the data type.  We may change this later.
	if (quote) {
	    datatype = TY_CHAR

	} else if (IS_DIGIT(word[1])) {
	    datatype = TY_DOUBLE

	} else if (word[1] == '-' || word[1] == '+' || word[1] == '.') {
	    if (IS_DIGIT(word[2]))
		datatype = TY_DOUBLE
	    else
		datatype = TY_CHAR

	} else if (word[1] == 'I') {
	    if (streq (word, "INDEFI")) {
		datatype = TY_INT
		width = 6
		fcode = 'd'
		goto finished_
	    } else if (strncmp (word, "INDEF", 5) == 0) {
		datatype = TY_DOUBLE
		width = 5
		prec = 5
		fcode = 'g'
		goto finished_
	    } else {
		datatype = TY_CHAR
	    }

	} else {
	    datatype = TY_CHAR
	}

	if (quote) {
	    # The value is enclosed in quotes; don't include them in the width.
	    width = word_width - 2

	    # Trim trailing blanks.  width is unchanged.
	    do i = strlen (word), 1, -1 {
		if (word[i] != ' ') {
		    word[i+1] = EOS
		    break
		}
	    }
	}

	if (datatype != TY_CHAR) {
	    # So far, the word appears to be a number.  Check each character,
	    # and change the type if we find that it's not numeric.

	    num_colon = 0			# initial values
	    exponent = false
	    dec_point = false
	    prec = 0				# incremented in loop
	    do i = 1, maxch {
		cval = word[i]
		if (IS_DIGIT(cval)) {
		    if (!exponent)
			prec = prec + 1		# count it
		} else if (cval == '.') {
		    # There can't be two decimal points, or even one in
		    # an exponent.
		    if (dec_point || exponent) {
			datatype = TY_CHAR
			goto chartype_
		    }
		    dec_point = true
		    if (num_colon > 0)
			prec = 0
		} else if (cval == '+' || cval == '-') {
		    # A sign must be the first character or in an exponent.
		    if (i > 1 && !exponent) {
			datatype = TY_CHAR
			goto chartype_
		    }
		} else if (cval == ':') {
		    num_colon = num_colon + 1
		    if (exponent || num_colon > 2) {
			datatype = TY_CHAR
			goto chartype_
		    }
		} else if (cval == 'E' || cval == 'e' ||
			   cval == 'D' || cval == 'd') {
		    # There can't be more than one exponent in a number.
		    if (exponent) {
			datatype = TY_CHAR
			goto chartype_
		    } exponent = true		# it looks like an exponent
		} else if (cval == EOS) {
		    break
		} else {
		    datatype = TY_CHAR		# not numeric
		    goto chartype_
		}
	    }
	    prec = max (prec, 1)

	    # We need this test for HMS format because there might have been
	    # no decimal point in the value (e.g. 3:17:42), so the digits
	    # would have been counted in the precision and not reset to zero
	    # by a decimal point.  We could in principle have prec=0, but
	    # that prints incorrect values due to truncation.
	    if (num_colon > 0 && !dec_point)
		prec = 1			# should be zero for HH:MM:SS

	    # Now make sure the field width is sufficient, and set format code.
	    if (num_colon == 2) {		# HH:MM:SS.d
		width = prec + 10
		fcode = 'h'
	    } else if (num_colon == 1) {	# HH:MM.d
		width = prec + 7
		fcode = 'm'
	    } else if (exponent) {
		width = prec + 6
		fcode = 'g'
	    } else if (dec_point) {
		width = prec + 2
		fcode = 'g'
	    } else {			# no decimal point, colon, or exponent
		width = prec + 1
		datatype = TY_INT		# reset datatype to int
		fcode = 'd'
	    }

	}

chartype_
	if (datatype == TY_CHAR && !quote) {
	    # It's a string, but we don't need to check for trailing blanks
	    # because the string is not enclosed in quotes.
	    width = word_width
	}

finished_
	# If this is the first column and the value is left-justified,
	# add a little extra space.
	if (ip_start == 1) {
	    if (buf[1] != ' ' && buf[1] != '\t')
		width = width + LEFT_MARGIN
	}

	return (word_width)
end