diff options
Diffstat (limited to 'pkg/utilities/nttools/tstat/thistogram.x')
-rw-r--r-- | pkg/utilities/nttools/tstat/thistogram.x | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/pkg/utilities/nttools/tstat/thistogram.x b/pkg/utilities/nttools/tstat/thistogram.x new file mode 100644 index 00000000..7470e1a5 --- /dev/null +++ b/pkg/utilities/nttools/tstat/thistogram.x @@ -0,0 +1,348 @@ +include <error.h> # defines EA_WARN +include <fset.h> # to check whether input or output is redirected +include <tbset.h> +include "thistogram.h" # defines NPAR, etc. + +define MAX_RANGES (SZ_LINE/2) # max number of ranges of row numbers + +# thistogram -- make a histogram of a table column +# +# Phil Hodge, 2-Dec-1988 Task created. +# Phil Hodge, 12-Jan-1989 th_mk_hist: ignore values that are out of range +# Phil Hodge, 17-Mar-1994 Include parameters dx, clow, chigh. +# Phil Hodge, 3-Oct-1995 Modify to use tbn instead of fnt. +# Phil Hodge, 8-Apr-1999 Call tbfpri. +# Phil Hodge, 8-Jun-1999 Set input/output to STDIN/STDOUT if redirected. + +procedure thistogram() + +pointer inlist, outlist # scr for input & output lists of names +char colname[SZ_COLNAME] # column name +int t_nbins # number of bins +double t_vlow, t_vhigh # lower & upper limits for histogram +double t_dx # bin width +double t_clow, t_chigh # centers of first and last bins +char outcolx[SZ_COLNAME] # column name for indep var for histogram +char outcoly[SZ_COLNAME] # column name for dependent var for histogram +#-- +pointer sp +pointer itp, otp # ptr to table descriptor +pointer cptr # ptr to column descriptor +pointer ocpx, ocpy # ptr to col descr for output columns +pointer intab, outtab # scr for names of input & output tables +pointer range_string # string which gives ranges of row numbers +pointer val, counts # scr for histogram: indep & dep var + +# These six parameters are copied from t_... in each loop. +int nbins +double vlow, vhigh, dx, clow, chigh + +pointer list1, list2 +int i, junk +int nrows # number of rows included and not INDEF +int phu_copied # set by tbfpri and ignored +bool listout # is the output ASCII rather than a table? +bool got[NPAR] # flags to specify what we have got +bool find_datamin # true if we need to find minimum data value +bool find_datamax # true if we need to find maximum data value +pointer tbtopn() +double clgetd() +int fstati() +pointer tbnopen() +int clgeti(), tbnget(), tbnlen() +bool streq() + +begin + # Allocate scratch for lists of names and for table names. + call smark (sp) + call salloc (inlist, SZ_FNAME, TY_CHAR) + call salloc (outlist, SZ_FNAME, TY_CHAR) + call salloc (intab, SZ_FNAME, TY_CHAR) + call salloc (outtab, SZ_FNAME, TY_CHAR) + call salloc (range_string, SZ_FNAME, TY_CHAR) + + # Get task parameters. + + if (fstati (STDIN, F_REDIR) == YES) + call strcpy ("STDIN", Memc[inlist], SZ_FNAME) + else + call clgstr ("intable", Memc[inlist], SZ_FNAME) + + if (fstati (STDOUT, F_REDIR) == YES) + call strcpy ("STDOUT", Memc[outlist], SZ_FNAME) + else + call clgstr ("outtable", Memc[outlist], SZ_FNAME) + + call clgstr ("column", colname, SZ_COLNAME) + + # Some of these six parameters may be INDEF. The "t_" prefix + # means these are task parameters; they are assigned to variables + # (same name but without the "t_") in the loop over tables, and + # those variables may be modified within the loop. + t_nbins = clgeti ("nbins") + t_vlow = clgetd ("lowval") + t_vhigh = clgetd ("highval") + t_dx = clgetd ("dx") + t_clow = clgetd ("clow") + t_chigh = clgetd ("chigh") + + call clgstr ("rows", Memc[range_string], SZ_FNAME) + + listout = streq (Memc[outlist], "STDOUT") # ASCII output? + if ( ! listout ) { + call clgstr ("outcolx", outcolx, SZ_COLNAME) + call clgstr ("outcoly", outcoly, SZ_COLNAME) + } + + if (!IS_INDEF(t_dx)) + if (t_dx <= 0.d0) + call error (1, "dx must not be less than or equal to zero") + + # These parameters are interdependent, so compute what was not + # specified from those that were, as far as we can. + call th_options (t_nbins, t_vlow, t_vhigh, t_dx, t_clow, t_chigh, + got, find_datamin, find_datamax) + + # Expand the input table list. + list1 = tbnopen (Memc[inlist]) + + if ( ! listout ) { + # Expand the output table list. + list2 = tbnopen (Memc[outlist]) + if (tbnlen (list1) != tbnlen (list2)) { + call tbnclose (list1) + call tbnclose (list2) + call error (1, + "Number of input and output tables not the same") + } + } + + # Do for each input table. + while (tbnget (list1, Memc[intab], SZ_FNAME) != EOF) { + + # These may be modified within this loop by th_limits. + nbins = t_nbins + vlow = t_vlow + vhigh = t_vhigh + dx = t_dx + clow = t_clow + chigh = t_chigh + + itp = tbtopn (Memc[intab], READ_ONLY, NULL) + call tbcfnd (itp, colname, cptr, 1) + if (cptr == NULL) { + call tbtclo (itp) + call eprintf ("column not found in %s\n") + call pargstr (Memc[intab]) + if ( ! listout ) # skip next output table + junk = tbnget (list2, Memc[outtab], SZ_FNAME) + next + } + + # Get lower & upper limits for the histogram. + iferr { + call th_limits (itp, cptr, Memc[range_string], + nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + } then { + call erract (EA_WARN) + call eprintf ("Table `%s' will be skipped.\n") + call pargstr (Memc[intab]) + call tbtclo (itp) + next + } + + # Get scratch space for the histogram. + call malloc (val, nbins, TY_DOUBLE) + call malloc (counts, nbins, TY_INT) + + # Make the histogram. + call th_mk_hist (itp, cptr, nbins, vlow, vhigh, dx, + Memc[range_string], Memd[val], Memi[counts], nrows) + + if ( listout ) { + call printf ("# %d rows %s\n") + call pargi (nrows) + call pargstr (Memc[intab]) + do i = 1, nbins { + call printf ("%15.7g %8d\n") + call pargd (Memd[val+i-1]) + call pargi (Memi[counts+i-1]) + } + } else { + + # Create output table & define columns. + junk = tbnget (list2, Memc[outtab], SZ_FNAME) + call tbfpri (Memc[intab], Memc[outtab], phu_copied) + otp = tbtopn (Memc[outtab], NEW_FILE, NULL) + call tbcdef (otp, ocpx, + outcolx, "", "", TY_DOUBLE, 1, 1) + call tbcdef (otp, ocpy, + outcoly, "histogram", "", TY_INT, 1, 1) + call tbtcre (otp) + + # Put info records in the header. + call tbhadt (otp, "intable", Memc[intab]) + call tbhadt (otp, "colname", colname) + call tbhadi (otp, "nrows", nrows) + + # Write the values into the output table, and close it. + call tbcptd (otp, ocpx, Memd[val], 1, nbins) + call tbcpti (otp, ocpy, Memi[counts], 1, nbins) + call tbtclo (otp) + } + call tbtclo (itp) + + call mfree (counts, TY_INT) + call mfree (val, TY_DOUBLE) + } + call tbnclose (list1) + if ( ! listout ) + call tbnclose (list2) + call sfree (sp) +end + +# th_limits -- get limits for histogram +# This routine determines the lower and upper limits of data values for +# making a histogram. If either of the input values v1 or v2 is not INDEF +# (i.e. if it was specified by the user), then that value is returned as +# vlow or vhigh respectively. If either or both are INDEF the minimum and +# maximum values in the table column are gotten, and the minimum and maximum +# are extended a little to ensure that the endpoints are included in the +# histogram. The range is extended by (max - min) / (nbins - 1) / 2 +# on each end. The parameters nbins, vlow, vhigh, and dx may be updated. + +procedure th_limits (tp, cptr, range_str, + nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + +pointer tp # i: ptr to table descriptor +pointer cptr # i: ptr to column descriptor +char range_str[ARB] # i: range of row numbers +int nbins # io: number of bins +double vlow, vhigh # io: lower and upper limits +double dx # io: bin width +double clow, chigh # i: centers of low and high bins +bool got[NPAR] # i: flags to specify what we have got +bool find_datamin # i: true if we need to find minimum data value +bool find_datamax # i: true if we need to find maximum data value +#-- +double value # an element gotten from the table +double vmin, vmax # min & max values in the column +int nrows # number of rows in table +int row # row number +int ranges[3,MAX_RANGES] # ranges of row numbers +int nvalues # returned by decode_ranges and ignored +int stat # returned by get_next_number +bool done +int decode_ranges(), get_next_number(), tbpsta() + +begin + if (find_datamin || find_datamax) { + + # We must determine either the minimum or maximum or both. + + if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) + call error (1, "bad range of row numbers") + nrows = tbpsta (tp, TBL_NROWS) + + # First get initial values for min & max in column. We can't just + # take the first value because it might be INDEF. + row = 0 # initialize get_next_number + stat = get_next_number (ranges, row) # get first row number + done = (stat == EOF) || (row > nrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( IS_INDEFD(value) ) { + # get next row number + stat = get_next_number (ranges, row) + if ((stat == EOF) || (row > nrows)) + call error (1, "all values are INDEF") + } else { + vmin = value + vmax = value + done = true + } + } + + # Update min & max values. + stat = get_next_number (ranges, row) # get next row number + done = (stat == EOF) || (row > nrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( !IS_INDEFD(value) ) { + if (value < vmin) + vmin = value + if (value > vmax) + vmax = value + } + stat = get_next_number (ranges, row) # get next row number + if ((stat == EOF) || (row > nrows)) + done = true + } + + # Update parameter values. + call th_update (vmin, vmax, nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + } +end + +# th_mk_hist -- make the histogram + +procedure th_mk_hist (tp, cptr, nbins, vlow, vhigh, dx, range_str, + val, counts, nrows) + +pointer tp # i: ptr to table descriptor +pointer cptr # i: ptr to column descriptor +int nbins # i: number of bins +double vlow # i: min value for histogram +double vhigh # i: max value for histogram +double dx # i: bin width +char range_str[ARB] # i: range of row numbers +double val[nbins] # o: array of values at center of bins +int counts[nbins] # o: the histogram, array of counts within bins +int nrows # o: number of rows in range_str and value within limits +#-- +double value # an element gotten from the table +int bin # bin number for output +int totalrows # total number of rows in table +int row # row number +int i +int ranges[3,MAX_RANGES] # ranges of row numbers +int nvalues # returned by decode_ranges and ignored +int stat # returned by get_next_number +bool done +int decode_ranges(), get_next_number(), tbpsta() + +begin + totalrows = tbpsta (tp, TBL_NROWS) + + # Initialize the range of row numbers. + if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) + call error (1, "bad range of row numbers") + + do i = 1, nbins { + val[i] = vlow + (i - 0.5d0) * dx # value at center of bin + counts[i] = 0 # initialize histogram + } + + nrows = 0 # initialize counter + row = 0 # initialize get_next_number + stat = get_next_number (ranges, row) # get first row number + done = (stat == EOF) || (row > totalrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( ! IS_INDEF(value) ) { + if (value >= vlow && value < vhigh) { + if (dx > 0.d0) + bin = int ((value - vlow) / dx) + 1 + else + bin = 1 + counts[bin] = counts[bin] + 1 + nrows = nrows + 1 + } + } + stat = get_next_number (ranges, row) # get next row number + done = (stat == EOF) || (row > totalrows) + } +end |