aboutsummaryrefslogtreecommitdiff
path: root/pkg/utilities/nttools/tstat/thistogram.x
blob: 7470e1a55785155e382280c211a58da42427bff4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
include <error.h>		# defines EA_WARN
include <fset.h>		# to check whether input or output is redirected
include <tbset.h>
include "thistogram.h"		# defines NPAR, etc.

define	MAX_RANGES (SZ_LINE/2)	# max number of ranges of row numbers

# thistogram -- make a histogram of a table column
#
# Phil Hodge,  2-Dec-1988  Task created.
# Phil Hodge, 12-Jan-1989  th_mk_hist:  ignore values that are out of range
# Phil Hodge, 17-Mar-1994  Include parameters dx, clow, chigh.
# Phil Hodge,  3-Oct-1995  Modify to use tbn instead of fnt.
# Phil Hodge,  8-Apr-1999  Call tbfpri.
# Phil Hodge,  8-Jun-1999  Set input/output to STDIN/STDOUT if redirected.

procedure thistogram()

pointer inlist, outlist		# scr for input & output lists of names
char	colname[SZ_COLNAME]	# column name
int	t_nbins			# number of bins
double	t_vlow, t_vhigh		# lower & upper limits for histogram
double	t_dx			# bin width
double	t_clow, t_chigh		# centers of first and last bins
char	outcolx[SZ_COLNAME]	# column name for indep var for histogram
char	outcoly[SZ_COLNAME]	# column name for dependent var for histogram
#--
pointer sp
pointer itp, otp		# ptr to table descriptor
pointer cptr			# ptr to column descriptor
pointer ocpx, ocpy		# ptr to col descr for output columns
pointer intab, outtab		# scr for names of input & output tables
pointer range_string		# string which gives ranges of row numbers
pointer val, counts		# scr for histogram:  indep & dep var

# These six parameters are copied from t_... in each loop.
int	nbins
double	vlow, vhigh, dx, clow, chigh

pointer list1, list2
int	i, junk
int	nrows			# number of rows included and not INDEF
int	phu_copied		# set by tbfpri and ignored
bool	listout			# is the output ASCII rather than a table?
bool	got[NPAR]		# flags to specify what we have got
bool	find_datamin		# true if we need to find minimum data value
bool	find_datamax		# true if we need to find maximum data value
pointer tbtopn()
double	clgetd()
int	fstati()
pointer tbnopen()
int	clgeti(), tbnget(), tbnlen()
bool	streq()

begin
	# Allocate scratch for lists of names and for table names.
	call smark (sp)
	call salloc (inlist, SZ_FNAME, TY_CHAR)
	call salloc (outlist, SZ_FNAME, TY_CHAR)
	call salloc (intab, SZ_FNAME, TY_CHAR)
	call salloc (outtab, SZ_FNAME, TY_CHAR)
	call salloc (range_string, SZ_FNAME, TY_CHAR)

	# Get task parameters.

	if (fstati (STDIN, F_REDIR) == YES)
	    call strcpy ("STDIN", Memc[inlist], SZ_FNAME)
	else
	    call clgstr ("intable", Memc[inlist], SZ_FNAME)

	if (fstati (STDOUT, F_REDIR) == YES)
	    call strcpy ("STDOUT", Memc[outlist], SZ_FNAME)
	else
	    call clgstr ("outtable", Memc[outlist], SZ_FNAME)

	call clgstr ("column", colname, SZ_COLNAME)

	# Some of these six parameters may be INDEF.  The "t_" prefix
	# means these are task parameters; they are assigned to variables
	# (same name but without the "t_") in the loop over tables, and
	# those variables may be modified within the loop.
	t_nbins = clgeti ("nbins")
	t_vlow = clgetd ("lowval")
	t_vhigh = clgetd ("highval")
	t_dx = clgetd ("dx")
	t_clow = clgetd ("clow")
	t_chigh = clgetd ("chigh")

	call clgstr ("rows", Memc[range_string], SZ_FNAME)

	listout = streq (Memc[outlist], "STDOUT")	# ASCII output?
	if ( ! listout ) {
	    call clgstr ("outcolx", outcolx, SZ_COLNAME)
	    call clgstr ("outcoly", outcoly, SZ_COLNAME)
	}

	if (!IS_INDEF(t_dx))
	    if (t_dx <= 0.d0)
		call error (1, "dx must not be less than or equal to zero")

	# These parameters are interdependent, so compute what was not
	# specified from those that were, as far as we can.
	call th_options (t_nbins, t_vlow, t_vhigh, t_dx, t_clow, t_chigh,
		got, find_datamin, find_datamax)

	# Expand the input table list.
	list1 = tbnopen (Memc[inlist])

	if ( ! listout ) {
	    # Expand the output table list.
	    list2 = tbnopen (Memc[outlist])
	    if (tbnlen (list1) != tbnlen (list2)) {
		call tbnclose (list1)
		call tbnclose (list2)
		call error (1,
			"Number of input and output tables not the same")
	    }
	}

	# Do for each input table.
	while (tbnget (list1, Memc[intab], SZ_FNAME) != EOF) {

	    # These may be modified within this loop by th_limits.
	    nbins = t_nbins
	    vlow = t_vlow
	    vhigh = t_vhigh
	    dx = t_dx
	    clow = t_clow
	    chigh = t_chigh

	    itp = tbtopn (Memc[intab], READ_ONLY, NULL)
	    call tbcfnd (itp, colname, cptr, 1)
	    if (cptr == NULL) {
		call tbtclo (itp)
		call eprintf ("column not found in %s\n")
		    call pargstr (Memc[intab])
		if ( ! listout )	# skip next output table
		    junk = tbnget (list2, Memc[outtab], SZ_FNAME)
		next
	    }

	    # Get lower & upper limits for the histogram.
	    iferr {
		call th_limits (itp, cptr, Memc[range_string],
			nbins, vlow, vhigh, dx, clow, chigh,
			got, find_datamin, find_datamax)
	    } then {
		call erract (EA_WARN)
		call eprintf ("Table `%s' will be skipped.\n")
		    call pargstr (Memc[intab])
		call tbtclo (itp)
		next
	    }

	    # Get scratch space for the histogram.
	    call malloc (val, nbins, TY_DOUBLE)
	    call malloc (counts, nbins, TY_INT)

	    # Make the histogram.
	    call th_mk_hist (itp, cptr, nbins, vlow, vhigh, dx,
			Memc[range_string], Memd[val], Memi[counts], nrows)

	    if ( listout ) {
		call printf ("# %d rows %s\n")
		    call pargi (nrows)
		    call pargstr (Memc[intab])
		do i = 1, nbins {
		    call printf ("%15.7g %8d\n")
			call pargd (Memd[val+i-1])
			call pargi (Memi[counts+i-1])
		}
	    } else {

		# Create output table & define columns.
		junk = tbnget (list2, Memc[outtab], SZ_FNAME)
		call tbfpri (Memc[intab], Memc[outtab], phu_copied)
		otp = tbtopn (Memc[outtab], NEW_FILE, NULL)
		call tbcdef (otp, ocpx,
			outcolx, "", "", TY_DOUBLE, 1, 1)
		call tbcdef (otp, ocpy,
			outcoly, "histogram", "", TY_INT, 1, 1)
		call tbtcre (otp)

		# Put info records in the header.
		call tbhadt (otp, "intable", Memc[intab])
		call tbhadt (otp, "colname", colname)
		call tbhadi (otp, "nrows", nrows)

		# Write the values into the output table, and close it.
		call tbcptd (otp, ocpx, Memd[val], 1, nbins)
		call tbcpti (otp, ocpy, Memi[counts], 1, nbins)
		call tbtclo (otp)
	    }
	    call tbtclo (itp)

	    call mfree (counts, TY_INT)
	    call mfree (val, TY_DOUBLE)
	}
	call tbnclose (list1)
	if ( ! listout )
	    call tbnclose (list2)
	call sfree (sp)
end

# th_limits -- get limits for histogram
# This routine determines the lower and upper limits of data values for
# making a histogram.  If either of the input values v1 or v2 is not INDEF
# (i.e. if it was specified by the user), then that value is returned as
# vlow or vhigh respectively.  If either or both are INDEF the minimum and
# maximum values in the table column are gotten, and the minimum and maximum
# are extended a little to ensure that the endpoints are included in the
# histogram.  The range is extended by (max - min) / (nbins - 1) / 2
# on each end.  The parameters nbins, vlow, vhigh, and dx may be updated.

procedure th_limits (tp, cptr, range_str,
		nbins, vlow, vhigh, dx, clow, chigh,
		got, find_datamin, find_datamax)

pointer tp		# i: ptr to table descriptor
pointer cptr		# i: ptr to column descriptor
char	range_str[ARB]	# i: range of row numbers
int	nbins		# io: number of bins
double	vlow, vhigh	# io: lower and upper limits
double	dx		# io: bin width
double	clow, chigh	# i: centers of low and high bins
bool	got[NPAR]	# i: flags to specify what we have got
bool	find_datamin	# i: true if we need to find minimum data value
bool	find_datamax	# i: true if we need to find maximum data value
#--
double	value		# an element gotten from the table
double	vmin, vmax	# min & max values in the column
int	nrows		# number of rows in table
int	row		# row number
int	ranges[3,MAX_RANGES]	# ranges of row numbers
int	nvalues			# returned by decode_ranges and ignored
int	stat			# returned by get_next_number
bool	done
int	decode_ranges(), get_next_number(), tbpsta()

begin
	if (find_datamin || find_datamax) {

	    # We must determine either the minimum or maximum or both.

	    if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK)
		call error (1, "bad range of row numbers")
	    nrows = tbpsta (tp, TBL_NROWS)

	    # First get initial values for min & max in column.  We can't just
	    # take the first value because it might be INDEF.
	    row = 0				# initialize get_next_number
	    stat = get_next_number (ranges, row)	# get first row number
	    done = (stat == EOF) || (row > nrows)
	    while ( ! done ) {
		call tbegtd (tp, cptr, row, value)
		if ( IS_INDEFD(value) ) {
		    # get next row number
		    stat = get_next_number (ranges, row)
		    if ((stat == EOF) || (row > nrows))
			call error (1, "all values are INDEF")
		} else {
		    vmin = value
		    vmax = value
		    done = true
		}
	    }

	    # Update min & max values.
	    stat = get_next_number (ranges, row)	# get next row number
	    done = (stat == EOF) || (row > nrows)
	    while ( ! done ) {
		call tbegtd (tp, cptr, row, value)
		if ( !IS_INDEFD(value) ) {
		    if (value < vmin)
			vmin = value
		    if (value > vmax)
			vmax = value
		}
		stat = get_next_number (ranges, row)	# get next row number
		if ((stat == EOF) || (row > nrows))
		    done = true
	    }

	    # Update parameter values.
	    call th_update (vmin, vmax, nbins, vlow, vhigh, dx, clow, chigh,
			got, find_datamin, find_datamax)
	}
end

# th_mk_hist -- make the histogram

procedure th_mk_hist (tp, cptr, nbins, vlow, vhigh, dx, range_str,
		val, counts, nrows)

pointer tp		# i: ptr to table descriptor
pointer cptr		# i: ptr to column descriptor
int	nbins		# i: number of bins
double	vlow		# i: min value for histogram
double	vhigh		# i: max value for histogram
double	dx		# i: bin width
char	range_str[ARB]	# i: range of row numbers
double	val[nbins]	# o: array of values at center of bins
int	counts[nbins]	# o: the histogram, array of counts within bins
int	nrows		# o: number of rows in range_str and value within limits
#--
double	value		# an element gotten from the table
int	bin		# bin number for output
int	totalrows	# total number of rows in table
int	row		# row number
int	i
int	ranges[3,MAX_RANGES]	# ranges of row numbers
int	nvalues			# returned by decode_ranges and ignored
int	stat			# returned by get_next_number
bool	done
int	decode_ranges(), get_next_number(), tbpsta()

begin
	totalrows = tbpsta (tp, TBL_NROWS)

	# Initialize the range of row numbers.
	if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK)
	    call error (1, "bad range of row numbers")

	do i = 1, nbins {
	    val[i] = vlow + (i - 0.5d0) * dx	# value at center of bin
	    counts[i] = 0			# initialize histogram
	}

	nrows = 0				# initialize counter
	row = 0					# initialize get_next_number
	stat = get_next_number (ranges, row)	# get first row number
	done = (stat == EOF) || (row > totalrows)
	while ( ! done ) {
	    call tbegtd (tp, cptr, row, value)
	    if ( ! IS_INDEF(value) ) {
		if (value >= vlow && value < vhigh) {
		    if (dx > 0.d0)
			bin = int ((value - vlow) / dx) + 1
		    else
			bin = 1
		    counts[bin] = counts[bin] + 1
		    nrows = nrows + 1
		}
	    }
	    stat = get_next_number (ranges, row)	# get next row number
	    done = (stat == EOF) || (row > totalrows)
	}
end