1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
|
include <error.h>
include <fset.h> # to check whether input or output is redirected
include <tbset.h>
define MAX_RANGES (SZ_LINE/2) # max number of ranges of row numbers
define NUM_COL 8 # number of output columns
# tstat -- get statistics for a table column
# This task gets the mean, standard deviation, median, and minimum &
# maximum values for a table column.
#
# Phil Hodge, 8-Dec-1988 Task created.
# Phil Hodge, 14-Mar-1989 Also compute the median; fix bug in std dev.
# Phil Hodge, 27-May-1992 Print INDEF if no values in range.
# Phil Hodge, 31-Jul-1992 Print column name after table name.
# Phil Hodge, 11-Jan-1992 Use asrtd instead of asokd for median if nr is even.
# Phil Hodge, 3-Oct-1995 Modify to use tbn instead of fnt.
# Phil Hodge, 25-Apr-1997 Use asrtd instead of asokd regardless of nr.
# Phil Hodge, 26-Mar-1998 Get all elements of array columns; ignore the
# column parameter if an input table has only one column.
# Phil Hodge, 8-Jun-1999 Set input/output to STDIN/STDOUT if redirected.
# Phil Hodge, 2-Jan-2001 If the input was redirected, and one command-line
# argument was specified, take that argument to be the
# column name rather than the table name.
procedure tstat()
pointer inlist # scratch for list of input table names
char cl_colname[SZ_COLNAME] # column name gotten from cl parameter
double lowlim # lower & upper limits for histogram
double highlim # lower & upper limits for histogram
pointer range_string # string which gives ranges of row numbers
char nam_table[SZ_FNAME] # column name for table name
char nam_name[SZ_COLNAME] # column name for column name
char nam_mean[SZ_COLNAME] # column name for mean
char nam_stddev[SZ_COLNAME] # column name for standard deviation
char nam_med[SZ_COLNAME] # column name for median
char nam_min[SZ_COLNAME] # column name for minimum
char nam_max[SZ_COLNAME] # column name for maximum
char nam_nrows[SZ_COLNAME] # column name for number of good rows
#--
pointer list1 # for list of input tables
pointer sp
pointer itp, otp # ptr to table descriptor
pointer cptr # ptr to column descriptor
pointer ocp[NUM_COL] # ptrs to col descriptors for output columns
pointer intab, outtab # scr for names of input & output tables
char colname[SZ_COLNAME] # column name
double vmean # mean value
double vstddev # standard deviation of values
double vmedian # median value
double vmin, vmax # minimum & maximum values
int row # output row number
int nrows # number of rows included and not INDEF
int ntables # number of tables in input list
bool listout # ASCII output?
bool tabout # table output?
bool new_table # true if output table does not already exist
int nargs # number of command-line arguments
bool in_redir # is input redirected?
int clgeti()
double clgetd()
int fstati()
pointer tbnopen()
int tbnget(), tbnlen()
pointer tbtopn()
pointer tbcnum()
int tbtacc(), tbpsta()
bool streq()
begin
# Allocate scratch for lists of names and for table names.
call smark (sp)
call salloc (inlist, SZ_LINE, TY_CHAR)
call salloc (intab, SZ_LINE, TY_CHAR)
call salloc (outtab, SZ_LINE, TY_CHAR)
call salloc (range_string, SZ_LINE, TY_CHAR)
# Get task parameters.
nargs = clgeti ("$nargs")
in_redir = fstati (STDIN, F_REDIR) == YES
if (in_redir)
call strcpy ("STDIN", Memc[inlist], SZ_LINE)
else
call clgstr ("intable", Memc[inlist], SZ_LINE)
if (fstati (STDOUT, F_REDIR) == YES)
call strcpy ("STDOUT", Memc[outtab], SZ_LINE)
else
call clgstr ("outtable", Memc[outtab], SZ_LINE)
cl_colname[1] = EOS # initial value
lowlim = clgetd ("lowlim") # these limits may be INDEF
highlim = clgetd ("highlim")
call clgstr ("rows", Memc[range_string], SZ_LINE)
# ASCII output? table output? create a new output table?
listout = streq (Memc[outtab], "STDOUT")
if ( listout || (Memc[outtab] == EOS) ) {
tabout = false
} else {
tabout = true
new_table = (tbtacc (Memc[outtab]) == NO)
}
if (tabout) {
call clgstr ("n_tab", nam_table, SZ_FNAME)
call clgstr ("n_nam", nam_name, SZ_COLNAME)
call clgstr ("n_nrows", nam_nrows, SZ_COLNAME)
call clgstr ("n_mean", nam_mean, SZ_COLNAME)
call clgstr ("n_stddev", nam_stddev, SZ_COLNAME)
call clgstr ("n_median", nam_med, SZ_COLNAME)
call clgstr ("n_min", nam_min, SZ_COLNAME)
call clgstr ("n_max", nam_max, SZ_COLNAME)
}
# Expand the input table list.
list1 = tbnopen (Memc[inlist])
ntables = tbnlen (list1)
if (listout) {
row = 0 # just to have a definite value
if (ntables > 1) {
call printf ("# nrows")
call printf (" mean stddev median")
call printf (" min max\n\n")
}
} else if (tabout) {
# Create output table (or open existing table) & define columns.
if (new_table)
otp = tbtopn (Memc[outtab], NEW_FILE, NULL)
else
otp = tbtopn (Memc[outtab], READ_WRITE, NULL)
call tbcfnd (otp, nam_table, ocp[1], 1)
call tbcfnd (otp, nam_name, ocp[2], 1)
call tbcfnd (otp, nam_nrows, ocp[3], 1)
call tbcfnd (otp, nam_mean, ocp[4], 1)
call tbcfnd (otp, nam_stddev, ocp[5], 1)
call tbcfnd (otp, nam_med, ocp[6], 1)
call tbcfnd (otp, nam_min, ocp[7], 1)
call tbcfnd (otp, nam_max, ocp[8], 1)
if (ocp[1] == NULL)
call tbcdef (otp, ocp[1],
nam_table, "", "", -SZ_FNAME, 1, 1)
if (ocp[2] == NULL)
call tbcdef (otp, ocp[2],
nam_name, "", "", -SZ_COLNAME, 1, 1)
if (ocp[3] == NULL)
call tbcdef (otp, ocp[3],
nam_nrows, "", "", TY_INT, 1, 1)
if (ocp[4] == NULL)
call tbcdef (otp, ocp[4],
nam_mean, "", "", TY_DOUBLE, 1, 1)
if (ocp[5] == NULL)
call tbcdef (otp, ocp[5],
nam_stddev, "", "", TY_DOUBLE, 1, 1)
if (ocp[6] == NULL)
call tbcdef (otp, ocp[6],
nam_med, "", "", TY_DOUBLE, 1, 1)
if (ocp[7] == NULL)
call tbcdef (otp, ocp[7],
nam_min, "", "", TY_DOUBLE, 1, 1)
if (ocp[8] == NULL)
call tbcdef (otp, ocp[8],
nam_max, "", "", TY_DOUBLE, 1, 1)
if (new_table)
call tbtcre (otp)
row = tbpsta (otp, TBL_NROWS) # append to whatever is there
}
# Do for each input table.
while (tbnget (list1, Memc[intab], SZ_LINE) != EOF) {
itp = tbtopn (Memc[intab], READ_ONLY, NULL)
if (tbpsta (itp, TBL_NCOLS) == 1) {
cptr = tbcnum (itp, 1)
call tbcigt (cptr, TBL_COL_NAME, colname, SZ_COLNAME)
} else {
if (cl_colname[1] == EOS) {
if (nargs == 1 && in_redir) {
# in this case, the one argument should be column name
call clgstr ("intable", cl_colname, SZ_COLNAME)
# update par file
call clpstr ("intable", "STDIN")
call clpstr ("column", cl_colname)
} else {
call clgstr ("column", cl_colname, SZ_COLNAME)
}
}
call strcpy (cl_colname, colname, SZ_COLNAME)
call tbcfnd (itp, colname, cptr, 1)
}
if (cptr == NULL) {
call tbtclo (itp)
call eprintf ("column %s not found in %s\n")
call pargstr (colname)
call pargstr (Memc[intab])
next
}
row = row + 1
if (listout) {
call printf ("# %s %s\n")
call pargstr (Memc[intab])
call pargstr (colname)
if (ntables == 1) {
call printf ("# nrows")
call printf (" mean stddev median")
call printf (" min max\n")
}
}
# Get statistics.
iferr {
call ts_values (itp, cptr, Memc[range_string], lowlim, highlim,
vmean, vstddev, vmedian, vmin, vmax, nrows)
} then {
call erract (EA_WARN)
next
}
if (listout) {
call printf ("%5d %17.10g %13.6g %13.6g %13.6g %13.6g\n")
call pargi (nrows)
call pargd (vmean)
call pargd (vstddev)
call pargd (vmedian)
call pargd (vmin)
call pargd (vmax)
} else if (tabout) {
# Write the values into the output table.
call tbeptt (otp, ocp[1], row, Memc[intab])
call tbeptt (otp, ocp[2], row, colname)
call tbepti (otp, ocp[3], row, nrows)
call tbeptd (otp, ocp[4], row, vmean)
call tbeptd (otp, ocp[5], row, vstddev)
call tbeptd (otp, ocp[6], row, vmedian)
call tbeptd (otp, ocp[7], row, vmin)
call tbeptd (otp, ocp[8], row, vmax)
}
call tbtclo (itp) # close current input table
}
# Close the output table. It may be that nothing was written to it.
if (tabout)
call tbtclo (otp)
# Save the results (from the last input table) in cl parameters.
call clputi ("nrows", nrows)
call clputd ("mean", vmean)
call clputd ("stddev", vstddev)
call clputd ("median", vmedian)
call clputd ("vmin", vmin)
call clputd ("vmax", vmax)
call tbnclose (list1)
call sfree (sp)
end
# ts_values -- get statistics for a table column
# This routine gets the mean, standard deviation, minimum and maximum
# values for a table column. If lower and/or upper cutoff limits were
# specified (i.e. are not INDEF) then values outside that range will not
# be included in the statistics. INDEF values are also not included.
procedure ts_values (tp, cptr, range_str, lowlim, highlim,
vmean, vstddev, vmedian, vmin, vmax, nrows)
pointer tp # i: ptr to table descriptor
pointer cptr # i: ptr to column descriptor
char range_str[ARB] # i: range of row numbers
double lowlim # i: lower cutoff of values to be included
double highlim # i: upper cutoff of values to be included
double vmean # o: mean value
double vstddev # o: standard deviation of values
double vmedian # o: median value
double vmin # o: minimum value
double vmax # o: maximum value
int nrows # o: number of rows included in the statistics
#--
pointer sp
pointer val # scratch for array of values (for median)
pointer descrip # column selector descriptor
double value # an element gotten from the table
double sum, sumsq # for accumulating sums
double smin, smax # temp min & max
double diff # current value minus first good value
int all_elem # total number of elements in column
int selrows # number of selected rows
int nelem # number of elements in one cell
int nret # number returned (ignored)
int nr # current number of rows
int row # row number
int ranges[3,MAX_RANGES] # ranges of row numbers
int nvalues # returned by decode_ranges and ignored
int stat # returned by get_next_number
int i, j # loop indexes
bool chklow, chkhi # were low (high) limits specified?
bool val_ok # is current value within limits?
bool done # loop-termination flag
int decode_ranges(), get_next_number()
int tbagtd()
errchk tbagtd, tbegtd, tcs_rdaryd
begin
if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) {
call eprintf ("rows = `%s'\n")
call pargstr (range_str)
call error (1, "Range of row numbers is invalid.")
}
# Get the number of elements per table cell and the number of
# rows selected using row-selector syntax (as opposed to using
# the 'rows' task parameter).
call tbcnel1 (tp, cptr, descrip, nelem, selrows)
# Find out how many rows there are in the table, restricted by
# either a row selector or the 'rows' task parameter, or both.
if (range_str[1] == '-' || range_str[1] == EOS) {
all_elem = selrows * nelem
} else {
# Count the number of rows specified.
i = 0 # count of row numbers
row = 0 # initialize get_next_number
done = false
while (!done) {
stat = get_next_number (ranges, row)
if (stat == EOF || row > selrows)
done = true
else
i = i + 1
}
all_elem = i * nelem
}
# Allocate scratch space to hold an entire column.
call smark (sp)
call salloc (val, all_elem, TY_DOUBLE)
row = 0 # reinitialize get_next_number
stat = get_next_number (ranges, row)
done = (stat == EOF || row > selrows)
# Get the data.
i = 1
while (!done) {
if (descrip == NULL) {
if (nelem == 1)
call tbegtd (tp, cptr, row, Memd[val+i-1])
else
nret = tbagtd (tp, cptr, row, Memd[val+i-1], 1, nelem)
} else {
call tcs_rdaryd (tp, descrip, row, all_elem-i+1,
nret, Memd[val+i-1])
}
i = i + nelem
stat = get_next_number (ranges, row)
done = (stat == EOF || row > selrows)
}
if (all_elem != i - 1)
call error (1, "not all elements read from column")
chklow = ! IS_INDEFD(lowlim)
chkhi = ! IS_INDEFD(highlim)
# Check which values to include.
sum = 0.d0
nr = 0
do i = 0, all_elem-1 { # zero indexed
value = Memd[val+i]
if (!IS_INDEFD(value)) {
val_ok = true # an initial value
if (chkhi && (value > highlim)) {
val_ok = false
Memd[val+i] = INDEFD
}
if (chklow && (value < lowlim)) {
val_ok = false
Memd[val+i] = INDEFD
}
if (val_ok) {
nr = nr + 1
sum = sum + value
}
}
}
if (nr < 1) {
# No rows with valid data, so set the output to INDEF.
nrows = 0
vmean = INDEFD
vstddev = INDEFD
vmedian = INDEFD
vmin = INDEFD
vmax = INDEFD
return
} else {
# Assign two of the output values.
nrows = nr
vmean = sum / double(nr)
}
# Move good data to the beginning of the array.
if (nr < all_elem) {
j = 0
do i = 0, all_elem-1 { # zero indexed
if (!IS_INDEFD(Memd[val+i])) {
Memd[val+j] = Memd[val+i]
j = j + 1
}
}
}
# Find the min, max, standard deviation of the good values.
sumsq = 0.d0
smin = Memd[val]
smax = Memd[val]
do i = 0, nr-1 { # zero indexed
value = Memd[val+i]
diff = value - vmean
sumsq = sumsq + diff * diff
if (value < smin)
smin = value
if (value > smax)
smax = value
}
# Assign output values.
vmin = smin
vmax = smax
if (nr > 1)
vstddev = sqrt (sumsq / double(nr-1))
else
vstddev = INDEFD
# Determine the median. If there are an even number of values, the
# middle two are averaged.
if (nr < 3) {
vmedian = vmean
} else {
call asrtd (Memd[val], Memd[val], nr)
if (nr / 2 * 2 == nr) { # even number of values?
vmedian = (Memd[val+nr/2-1] + Memd[val+nr/2]) / 2.d0
} else {
vmedian = Memd[val+nr/2]
}
}
call sfree (sp)
end
|