diff options
Diffstat (limited to 'pkg/utilities/nttools/tstat')
-rw-r--r-- | pkg/utilities/nttools/tstat/mkpkg | 13 | ||||
-rw-r--r-- | pkg/utilities/nttools/tstat/thistogram.h | 8 | ||||
-rw-r--r-- | pkg/utilities/nttools/tstat/thistogram.x | 348 | ||||
-rw-r--r-- | pkg/utilities/nttools/tstat/thoptions.x | 343 | ||||
-rw-r--r-- | pkg/utilities/nttools/tstat/tstat.x | 465 |
5 files changed, 1177 insertions, 0 deletions
diff --git a/pkg/utilities/nttools/tstat/mkpkg b/pkg/utilities/nttools/tstat/mkpkg new file mode 100644 index 00000000..f708c769 --- /dev/null +++ b/pkg/utilities/nttools/tstat/mkpkg @@ -0,0 +1,13 @@ +# Update the thistogram application code in the ttools package library +# Author: Phil Hodge, 2-DEC-1988 + +$checkout libpkg.a ../ +$update libpkg.a +$checkin libpkg.a ../ +$exit + +libpkg.a: + thistogram.x <error.h> <tbset.h> "thistogram.h" + thoptions.x "thistogram.h" + tstat.x <tbset.h> + ; diff --git a/pkg/utilities/nttools/tstat/thistogram.h b/pkg/utilities/nttools/tstat/thistogram.h new file mode 100644 index 00000000..5c2c0910 --- /dev/null +++ b/pkg/utilities/nttools/tstat/thistogram.h @@ -0,0 +1,8 @@ +# thistogram.h +define NPAR 6 # there are six parameters altogether +define NBINS 1 # got[NBINS] = true means we have a value for nbins +define VLOW 2 +define VHIGH 3 +define DX 4 +define CLOW 5 +define CHIGH 6 diff --git a/pkg/utilities/nttools/tstat/thistogram.x b/pkg/utilities/nttools/tstat/thistogram.x new file mode 100644 index 00000000..7470e1a5 --- /dev/null +++ b/pkg/utilities/nttools/tstat/thistogram.x @@ -0,0 +1,348 @@ +include <error.h> # defines EA_WARN +include <fset.h> # to check whether input or output is redirected +include <tbset.h> +include "thistogram.h" # defines NPAR, etc. + +define MAX_RANGES (SZ_LINE/2) # max number of ranges of row numbers + +# thistogram -- make a histogram of a table column +# +# Phil Hodge, 2-Dec-1988 Task created. +# Phil Hodge, 12-Jan-1989 th_mk_hist: ignore values that are out of range +# Phil Hodge, 17-Mar-1994 Include parameters dx, clow, chigh. +# Phil Hodge, 3-Oct-1995 Modify to use tbn instead of fnt. +# Phil Hodge, 8-Apr-1999 Call tbfpri. +# Phil Hodge, 8-Jun-1999 Set input/output to STDIN/STDOUT if redirected. + +procedure thistogram() + +pointer inlist, outlist # scr for input & output lists of names +char colname[SZ_COLNAME] # column name +int t_nbins # number of bins +double t_vlow, t_vhigh # lower & upper limits for histogram +double t_dx # bin width +double t_clow, t_chigh # centers of first and last bins +char outcolx[SZ_COLNAME] # column name for indep var for histogram +char outcoly[SZ_COLNAME] # column name for dependent var for histogram +#-- +pointer sp +pointer itp, otp # ptr to table descriptor +pointer cptr # ptr to column descriptor +pointer ocpx, ocpy # ptr to col descr for output columns +pointer intab, outtab # scr for names of input & output tables +pointer range_string # string which gives ranges of row numbers +pointer val, counts # scr for histogram: indep & dep var + +# These six parameters are copied from t_... in each loop. +int nbins +double vlow, vhigh, dx, clow, chigh + +pointer list1, list2 +int i, junk +int nrows # number of rows included and not INDEF +int phu_copied # set by tbfpri and ignored +bool listout # is the output ASCII rather than a table? +bool got[NPAR] # flags to specify what we have got +bool find_datamin # true if we need to find minimum data value +bool find_datamax # true if we need to find maximum data value +pointer tbtopn() +double clgetd() +int fstati() +pointer tbnopen() +int clgeti(), tbnget(), tbnlen() +bool streq() + +begin + # Allocate scratch for lists of names and for table names. + call smark (sp) + call salloc (inlist, SZ_FNAME, TY_CHAR) + call salloc (outlist, SZ_FNAME, TY_CHAR) + call salloc (intab, SZ_FNAME, TY_CHAR) + call salloc (outtab, SZ_FNAME, TY_CHAR) + call salloc (range_string, SZ_FNAME, TY_CHAR) + + # Get task parameters. + + if (fstati (STDIN, F_REDIR) == YES) + call strcpy ("STDIN", Memc[inlist], SZ_FNAME) + else + call clgstr ("intable", Memc[inlist], SZ_FNAME) + + if (fstati (STDOUT, F_REDIR) == YES) + call strcpy ("STDOUT", Memc[outlist], SZ_FNAME) + else + call clgstr ("outtable", Memc[outlist], SZ_FNAME) + + call clgstr ("column", colname, SZ_COLNAME) + + # Some of these six parameters may be INDEF. The "t_" prefix + # means these are task parameters; they are assigned to variables + # (same name but without the "t_") in the loop over tables, and + # those variables may be modified within the loop. + t_nbins = clgeti ("nbins") + t_vlow = clgetd ("lowval") + t_vhigh = clgetd ("highval") + t_dx = clgetd ("dx") + t_clow = clgetd ("clow") + t_chigh = clgetd ("chigh") + + call clgstr ("rows", Memc[range_string], SZ_FNAME) + + listout = streq (Memc[outlist], "STDOUT") # ASCII output? + if ( ! listout ) { + call clgstr ("outcolx", outcolx, SZ_COLNAME) + call clgstr ("outcoly", outcoly, SZ_COLNAME) + } + + if (!IS_INDEF(t_dx)) + if (t_dx <= 0.d0) + call error (1, "dx must not be less than or equal to zero") + + # These parameters are interdependent, so compute what was not + # specified from those that were, as far as we can. + call th_options (t_nbins, t_vlow, t_vhigh, t_dx, t_clow, t_chigh, + got, find_datamin, find_datamax) + + # Expand the input table list. + list1 = tbnopen (Memc[inlist]) + + if ( ! listout ) { + # Expand the output table list. + list2 = tbnopen (Memc[outlist]) + if (tbnlen (list1) != tbnlen (list2)) { + call tbnclose (list1) + call tbnclose (list2) + call error (1, + "Number of input and output tables not the same") + } + } + + # Do for each input table. + while (tbnget (list1, Memc[intab], SZ_FNAME) != EOF) { + + # These may be modified within this loop by th_limits. + nbins = t_nbins + vlow = t_vlow + vhigh = t_vhigh + dx = t_dx + clow = t_clow + chigh = t_chigh + + itp = tbtopn (Memc[intab], READ_ONLY, NULL) + call tbcfnd (itp, colname, cptr, 1) + if (cptr == NULL) { + call tbtclo (itp) + call eprintf ("column not found in %s\n") + call pargstr (Memc[intab]) + if ( ! listout ) # skip next output table + junk = tbnget (list2, Memc[outtab], SZ_FNAME) + next + } + + # Get lower & upper limits for the histogram. + iferr { + call th_limits (itp, cptr, Memc[range_string], + nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + } then { + call erract (EA_WARN) + call eprintf ("Table `%s' will be skipped.\n") + call pargstr (Memc[intab]) + call tbtclo (itp) + next + } + + # Get scratch space for the histogram. + call malloc (val, nbins, TY_DOUBLE) + call malloc (counts, nbins, TY_INT) + + # Make the histogram. + call th_mk_hist (itp, cptr, nbins, vlow, vhigh, dx, + Memc[range_string], Memd[val], Memi[counts], nrows) + + if ( listout ) { + call printf ("# %d rows %s\n") + call pargi (nrows) + call pargstr (Memc[intab]) + do i = 1, nbins { + call printf ("%15.7g %8d\n") + call pargd (Memd[val+i-1]) + call pargi (Memi[counts+i-1]) + } + } else { + + # Create output table & define columns. + junk = tbnget (list2, Memc[outtab], SZ_FNAME) + call tbfpri (Memc[intab], Memc[outtab], phu_copied) + otp = tbtopn (Memc[outtab], NEW_FILE, NULL) + call tbcdef (otp, ocpx, + outcolx, "", "", TY_DOUBLE, 1, 1) + call tbcdef (otp, ocpy, + outcoly, "histogram", "", TY_INT, 1, 1) + call tbtcre (otp) + + # Put info records in the header. + call tbhadt (otp, "intable", Memc[intab]) + call tbhadt (otp, "colname", colname) + call tbhadi (otp, "nrows", nrows) + + # Write the values into the output table, and close it. + call tbcptd (otp, ocpx, Memd[val], 1, nbins) + call tbcpti (otp, ocpy, Memi[counts], 1, nbins) + call tbtclo (otp) + } + call tbtclo (itp) + + call mfree (counts, TY_INT) + call mfree (val, TY_DOUBLE) + } + call tbnclose (list1) + if ( ! listout ) + call tbnclose (list2) + call sfree (sp) +end + +# th_limits -- get limits for histogram +# This routine determines the lower and upper limits of data values for +# making a histogram. If either of the input values v1 or v2 is not INDEF +# (i.e. if it was specified by the user), then that value is returned as +# vlow or vhigh respectively. If either or both are INDEF the minimum and +# maximum values in the table column are gotten, and the minimum and maximum +# are extended a little to ensure that the endpoints are included in the +# histogram. The range is extended by (max - min) / (nbins - 1) / 2 +# on each end. The parameters nbins, vlow, vhigh, and dx may be updated. + +procedure th_limits (tp, cptr, range_str, + nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + +pointer tp # i: ptr to table descriptor +pointer cptr # i: ptr to column descriptor +char range_str[ARB] # i: range of row numbers +int nbins # io: number of bins +double vlow, vhigh # io: lower and upper limits +double dx # io: bin width +double clow, chigh # i: centers of low and high bins +bool got[NPAR] # i: flags to specify what we have got +bool find_datamin # i: true if we need to find minimum data value +bool find_datamax # i: true if we need to find maximum data value +#-- +double value # an element gotten from the table +double vmin, vmax # min & max values in the column +int nrows # number of rows in table +int row # row number +int ranges[3,MAX_RANGES] # ranges of row numbers +int nvalues # returned by decode_ranges and ignored +int stat # returned by get_next_number +bool done +int decode_ranges(), get_next_number(), tbpsta() + +begin + if (find_datamin || find_datamax) { + + # We must determine either the minimum or maximum or both. + + if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) + call error (1, "bad range of row numbers") + nrows = tbpsta (tp, TBL_NROWS) + + # First get initial values for min & max in column. We can't just + # take the first value because it might be INDEF. + row = 0 # initialize get_next_number + stat = get_next_number (ranges, row) # get first row number + done = (stat == EOF) || (row > nrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( IS_INDEFD(value) ) { + # get next row number + stat = get_next_number (ranges, row) + if ((stat == EOF) || (row > nrows)) + call error (1, "all values are INDEF") + } else { + vmin = value + vmax = value + done = true + } + } + + # Update min & max values. + stat = get_next_number (ranges, row) # get next row number + done = (stat == EOF) || (row > nrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( !IS_INDEFD(value) ) { + if (value < vmin) + vmin = value + if (value > vmax) + vmax = value + } + stat = get_next_number (ranges, row) # get next row number + if ((stat == EOF) || (row > nrows)) + done = true + } + + # Update parameter values. + call th_update (vmin, vmax, nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + } +end + +# th_mk_hist -- make the histogram + +procedure th_mk_hist (tp, cptr, nbins, vlow, vhigh, dx, range_str, + val, counts, nrows) + +pointer tp # i: ptr to table descriptor +pointer cptr # i: ptr to column descriptor +int nbins # i: number of bins +double vlow # i: min value for histogram +double vhigh # i: max value for histogram +double dx # i: bin width +char range_str[ARB] # i: range of row numbers +double val[nbins] # o: array of values at center of bins +int counts[nbins] # o: the histogram, array of counts within bins +int nrows # o: number of rows in range_str and value within limits +#-- +double value # an element gotten from the table +int bin # bin number for output +int totalrows # total number of rows in table +int row # row number +int i +int ranges[3,MAX_RANGES] # ranges of row numbers +int nvalues # returned by decode_ranges and ignored +int stat # returned by get_next_number +bool done +int decode_ranges(), get_next_number(), tbpsta() + +begin + totalrows = tbpsta (tp, TBL_NROWS) + + # Initialize the range of row numbers. + if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) + call error (1, "bad range of row numbers") + + do i = 1, nbins { + val[i] = vlow + (i - 0.5d0) * dx # value at center of bin + counts[i] = 0 # initialize histogram + } + + nrows = 0 # initialize counter + row = 0 # initialize get_next_number + stat = get_next_number (ranges, row) # get first row number + done = (stat == EOF) || (row > totalrows) + while ( ! done ) { + call tbegtd (tp, cptr, row, value) + if ( ! IS_INDEF(value) ) { + if (value >= vlow && value < vhigh) { + if (dx > 0.d0) + bin = int ((value - vlow) / dx) + 1 + else + bin = 1 + counts[bin] = counts[bin] + 1 + nrows = nrows + 1 + } + } + stat = get_next_number (ranges, row) # get next row number + done = (stat == EOF) || (row > totalrows) + } +end diff --git a/pkg/utilities/nttools/tstat/thoptions.x b/pkg/utilities/nttools/tstat/thoptions.x new file mode 100644 index 00000000..5b9ce639 --- /dev/null +++ b/pkg/utilities/nttools/tstat/thoptions.x @@ -0,0 +1,343 @@ +include "thistogram.h" # defines NPAR, etc. + +# This file contains th_options and th_update. +# +# Phil Hodge, 18-Mar-1994 Subroutines created. + +# th_options -- different options for specifying limits +# There are different ways to specify the limits and bin spacing. This +# routine computes some parameters given others, based on the following: +# +# dx = (vhigh - vlow) / nbins +# dx = (chigh - clow) / (nbins - 1) +# clow = vlow + dx / 2 +# chigh = vhigh - dx / 2 +# +# Note that vlow and vhigh correspond to task parameters lowval and highval. + +procedure th_options (nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + +int nbins # io: number of bins +double vlow, vhigh # io: lower and upper limits +double dx # io: bin width +double clow, chigh # io: centers of low and high bins +bool got[NPAR] # o: flags to specify what we have got +bool find_datamin # o: true if we need to find minimum data value +bool find_datamax # o: true if we need to find maximum data value +#-- +bool fp_equald() + +begin + # These flags will be reset below if we determine the values. + got[NBINS] = !IS_INDEFI(nbins) + got[VLOW] = !IS_INDEFD(vlow) + got[VHIGH] = !IS_INDEFD(vhigh) + got[DX] = !IS_INDEFD(dx) + got[CLOW] = !IS_INDEFD(clow) + got[CHIGH] = !IS_INDEFD(chigh) + + # Check whether low value is greater than high value. + if (got[VLOW] && got[VHIGH]) + if (vlow > vhigh) + call error (1, "lowval must not be larger than highval") + if (got[CLOW] && got[CHIGH]) + if (clow > chigh) + call error (1, "clow must not be larger than chigh") + + # Further checking. + if (got[VLOW] && got[CLOW]) + if (vlow > clow) + call error (1, "lowval must not be larger than clow") + if (got[CHIGH] && got[VHIGH]) + if (chigh > vhigh) + call error (1, "chigh must not be larger than highval") + if (got[VLOW] && got[CHIGH]) + if (vlow > chigh) + call error (1, "lowval must not be larger than chigh") + if (got[CLOW] && got[VHIGH]) + if (clow > vhigh) + call error (1, "clow must not be larger than highval") + + # Set flags to specify what (if anything) we must get from the data. + # These may be reset below. + find_datamin = (!got[VLOW] && !got[CLOW]) + find_datamax = (!got[VHIGH] && !got[CHIGH]) + + if (got[DX]) { + + # Was the lower limit specified by the user? + if (got[CLOW]) { + if (got[VLOW]) { + if (!fp_equald (clow, vlow + dx/2.d0)) + call error (1, "values of dx, clow, lowval conflict") + } else { + vlow = clow - dx / 2.d0 + got[VLOW] = true + } + } else if (got[VLOW]) { + clow = vlow + dx / 2.d0 + got[CLOW] = true + } + + # Was the upper limit specified? + if (got[CHIGH]) { + if (got[VHIGH]) { + if (!fp_equald (chigh, vhigh - dx/2.d0)) + call error (1, "values of dx, chigh, highval conflict") + } else { + vhigh = chigh + dx / 2.d0 + got[VHIGH] = true + } + } else if (got[VHIGH]) { + chigh = vhigh - dx / 2.d0 + got[CHIGH] = true + } + + # Was the number of bins specified? + if (got[NBINS]) { + if (got[VLOW] && got[VHIGH]) { + if (!fp_equald (vhigh - vlow, dx * nbins)) + call error (1, "specified values for limits conflict") + } else if (got[VLOW]) { + vhigh = vlow + dx * nbins + chigh = vhigh - dx / 2.d0 + got[VHIGH] = true + got[CHIGH] = true + find_datamax = false + } else if (got[VHIGH]) { + vlow = vhigh - dx * nbins + clow = vlow + dx / 2.d0 + got[VLOW] = true + got[CLOW] = true + find_datamin = false + } + + } else if (got[VLOW] && got[VHIGH]) { + nbins = nint ((vhigh - vlow) / dx) + if (nbins * dx < vhigh - vlow) + nbins = nbins + 1 # round up + got[NBINS] = true + } + + } else if (got[NBINS]) { # but we don't have dx + + if (nbins == 1) { + if (!got[VLOW] && !got[VHIGH] && (got[CLOW] || got[CHIGH])) { + call eprintf ( + "nbins = 1, clow or chigh was specified, but dx was not.\n") + call error (1, "must specify dx for this case") + } + } + + if (got[VLOW] && got[VHIGH]) { + + dx = (vhigh - vlow) / double(nbins) + got[DX] = true + if (got[CLOW]) { + if (!fp_equald (clow, vlow + dx/2.d0)) + call error (1, "clow conflicts with other parameters") + } else { + clow = vlow + dx / 2.d0 + got[CLOW] = true + } + if (got[CHIGH]) { + if (!fp_equald (chigh, vhigh - dx/2.d0)) + call error (1, "chigh conflicts with other parameters") + } else { + chigh = vhigh - dx / 2.d0 + got[CHIGH] = true + } + + } else if (got[CLOW] && got[CHIGH]) { + + if (nbins == 1) { + if (!fp_equald (clow, chigh)) + call error (1, "nbins = 1, but clow != chigh") + } else { + dx = (chigh - clow) / (double(nbins) - 1.d0) + got[DX] = true + if (got[VLOW]) { + if (!fp_equald (vlow, clow - dx/2.d0)) + call error (1, + "lowval conflicts with other parameters") + } else { + vlow = clow - dx / 2.d0 + got[VLOW] = true + } + if (got[VHIGH]) { + if (!fp_equald (vhigh, chigh + dx/2.d0)) + call error (1, + "highval conflicts with other parameters") + } else { + vhigh = chigh + dx / 2.d0 + got[VHIGH] = true + } + } + + } else if (got[CLOW] && got[VLOW]) { + + dx = (clow - vlow) * 2.d0 + vhigh = vlow + dx * nbins + chigh = vhigh - dx / 2.d0 + got[DX] = true + got[VHIGH] = true + got[CHIGH] = true + find_datamax = false + + } else if (got[CHIGH] && got[VHIGH]) { + + dx = (vhigh - chigh) * 2.d0 + vlow = vhigh - dx * nbins + clow = vlow + dx / 2.d0 + got[DX] = true + got[VLOW] = true + got[CLOW] = true + find_datamin = false + + } else if (got[CLOW] && got[VHIGH]) { + + dx = (vhigh - clow) / (double(nbins) - 0.5d0) + vlow = vhigh - dx * nbins + chigh = vhigh - dx / 2.d0 + got[DX] = true + got[VLOW] = true + got[CHIGH] = true + + } else if (got[VLOW] && got[CHIGH]) { + + dx = (chigh - vlow) / (double(nbins) - 0.5d0) + clow = vlow + dx / 2.d0 + vhigh = vlow + dx * nbins + got[DX] = true + got[CLOW] = true + got[VHIGH] = true + + } + + } else if (got[CLOW] && got[VLOW]) { # but neither dx nor nbins + + dx = (clow - vlow) * 2.d0 + got[DX] = true + + } else if (got[CHIGH] && got[VHIGH]) { # but neither dx nor nbins + + dx = (vhigh - chigh) * 2.d0 + got[DX] = true + + } else { + call error (1, "you must specify either nbins or dx (or both)") + } +end + +# th_update -- update the limits +# We now have the minimum and maximum data values from the table, +# so we can fill in the values that the user did not specify. +# We need nbins, vlow, vhigh, and dx. The values of clow and chigh +# are not modified, even if they are still INDEF; the array of flags +# (got) must not be modified, as these flags are used for different +# tables if there is more than one in the input list. + +procedure th_update (vmin, vmax, nbins, vlow, vhigh, dx, clow, chigh, + got, find_datamin, find_datamax) + +double vmin, vmax # i: min and max data values for current table +int nbins # io: number of bins +double vlow, vhigh # io: lower and upper limits +double dx # io: bin width +double clow, chigh # i: centers of low and high bins +bool got[NPAR] # i: flags that specify what parameters we already have +bool find_datamin # i: true if we had to find minimum data value +bool find_datamax # i: true if we had to find maximum data value +#-- +double delta # for expanding the range to include endpoints + +begin + if (find_datamin && find_datamax) { + + # Center the data within the range. We do have either + # dx or nbins or both. + if (got[DX] && got[NBINS]) { + delta = (dx * nbins - (vmax - vmin)) / 2.d0 + } else if (got[NBINS]) { + if (nbins == 1) + delta = (vmax - vmin) / 100.d0 # 100 is arbitrary + else + delta = (vmax - vmin) / (nbins - 1.d0) / 2.d0 + } else if (got[DX]) { + nbins = nint ((vmax - vmin) / dx) + if (nbins * dx <= vmax - vmin) + nbins = nbins + 1 # round up + delta = (dx * nbins - (vmax - vmin)) / 2.d0 + } + vlow = vmin - delta + vhigh = vmax + delta + if (!got[DX]) + dx = (vhigh - vlow) / nbins + + } else if (find_datamin) { + + # We don't have both dx and nbins. If we did, we would have + # calculated vlow from vhigh, dx, and nbins. + if (got[DX]) { + + nbins = nint ((vhigh - vmin) / dx) # vhigh is known + if (nbins * dx <= vhigh - vmin) + nbins = nbins + 1 # round up + vlow = vhigh - dx * nbins + + } else if (got[NBINS]) { + + if (got[VHIGH]) { + if (nbins == 1) + delta = (vhigh - vmin) / 100.d0 # 100 is arbitrary + else + delta = (vhigh - vmin) / (nbins - 1.d0) / 2.d0 + vlow = vmin - delta + dx = (vhigh - vlow) / nbins + } else { # we have chigh but not vhigh + if (nbins == 1) { + vlow = vmin # this case doesn't make sense + vhigh = chigh + (chigh - vmin) + dx = vhigh - vlow + } else { # set clow = vmin + dx = (chigh - vmin) / (nbins - 1.d0) + vlow = vmin - dx / 2.d0 + vhigh = chigh + dx / 2.d0 + } + } + } + + } else if (find_datamax) { + + # For this case as well, we don't have both dx and nbins. + if (got[DX]) { + + nbins = nint ((vmax - vlow) / dx) # vlow is known + if (nbins * dx <= vmax - vlow) + nbins = nbins + 1 # round up + vhigh = vlow + dx * nbins + + } else if (got[NBINS]) { + + if (got[VLOW]) { + if (nbins == 1) + delta = (vmax - vlow) / 100.d0 # 100 is arbitrary + else + delta = (vmax - vlow) / (nbins - 1.d0) / 2.d0 + vhigh = vmax + delta + dx = (vhigh - vlow) / nbins + } else { # we have clow but not vlow + if (nbins == 1) { + vhigh = vmax # this case doesn't make sense + vlow = clow - (vmax - clow) + dx = vhigh - vlow + } else { # set chigh = vmax + dx = (vmax - clow) / (nbins - 1.d0) + vlow = vmin - dx / 2.d0 + vhigh = chigh + dx / 2.d0 + } + } + } + } +end diff --git a/pkg/utilities/nttools/tstat/tstat.x b/pkg/utilities/nttools/tstat/tstat.x new file mode 100644 index 00000000..5ef2b435 --- /dev/null +++ b/pkg/utilities/nttools/tstat/tstat.x @@ -0,0 +1,465 @@ +include <error.h> +include <fset.h> # to check whether input or output is redirected +include <tbset.h> + +define MAX_RANGES (SZ_LINE/2) # max number of ranges of row numbers +define NUM_COL 8 # number of output columns + +# tstat -- get statistics for a table column +# This task gets the mean, standard deviation, median, and minimum & +# maximum values for a table column. +# +# Phil Hodge, 8-Dec-1988 Task created. +# Phil Hodge, 14-Mar-1989 Also compute the median; fix bug in std dev. +# Phil Hodge, 27-May-1992 Print INDEF if no values in range. +# Phil Hodge, 31-Jul-1992 Print column name after table name. +# Phil Hodge, 11-Jan-1992 Use asrtd instead of asokd for median if nr is even. +# Phil Hodge, 3-Oct-1995 Modify to use tbn instead of fnt. +# Phil Hodge, 25-Apr-1997 Use asrtd instead of asokd regardless of nr. +# Phil Hodge, 26-Mar-1998 Get all elements of array columns; ignore the +# column parameter if an input table has only one column. +# Phil Hodge, 8-Jun-1999 Set input/output to STDIN/STDOUT if redirected. +# Phil Hodge, 2-Jan-2001 If the input was redirected, and one command-line +# argument was specified, take that argument to be the +# column name rather than the table name. + +procedure tstat() + +pointer inlist # scratch for list of input table names +char cl_colname[SZ_COLNAME] # column name gotten from cl parameter +double lowlim # lower & upper limits for histogram +double highlim # lower & upper limits for histogram +pointer range_string # string which gives ranges of row numbers +char nam_table[SZ_FNAME] # column name for table name +char nam_name[SZ_COLNAME] # column name for column name +char nam_mean[SZ_COLNAME] # column name for mean +char nam_stddev[SZ_COLNAME] # column name for standard deviation +char nam_med[SZ_COLNAME] # column name for median +char nam_min[SZ_COLNAME] # column name for minimum +char nam_max[SZ_COLNAME] # column name for maximum +char nam_nrows[SZ_COLNAME] # column name for number of good rows +#-- +pointer list1 # for list of input tables +pointer sp +pointer itp, otp # ptr to table descriptor +pointer cptr # ptr to column descriptor +pointer ocp[NUM_COL] # ptrs to col descriptors for output columns +pointer intab, outtab # scr for names of input & output tables +char colname[SZ_COLNAME] # column name +double vmean # mean value +double vstddev # standard deviation of values +double vmedian # median value +double vmin, vmax # minimum & maximum values +int row # output row number +int nrows # number of rows included and not INDEF +int ntables # number of tables in input list +bool listout # ASCII output? +bool tabout # table output? +bool new_table # true if output table does not already exist +int nargs # number of command-line arguments +bool in_redir # is input redirected? +int clgeti() +double clgetd() +int fstati() +pointer tbnopen() +int tbnget(), tbnlen() +pointer tbtopn() +pointer tbcnum() +int tbtacc(), tbpsta() +bool streq() + +begin + # Allocate scratch for lists of names and for table names. + call smark (sp) + call salloc (inlist, SZ_LINE, TY_CHAR) + call salloc (intab, SZ_LINE, TY_CHAR) + call salloc (outtab, SZ_LINE, TY_CHAR) + call salloc (range_string, SZ_LINE, TY_CHAR) + + # Get task parameters. + + nargs = clgeti ("$nargs") + in_redir = fstati (STDIN, F_REDIR) == YES + + if (in_redir) + call strcpy ("STDIN", Memc[inlist], SZ_LINE) + else + call clgstr ("intable", Memc[inlist], SZ_LINE) + + if (fstati (STDOUT, F_REDIR) == YES) + call strcpy ("STDOUT", Memc[outtab], SZ_LINE) + else + call clgstr ("outtable", Memc[outtab], SZ_LINE) + + cl_colname[1] = EOS # initial value + lowlim = clgetd ("lowlim") # these limits may be INDEF + highlim = clgetd ("highlim") + call clgstr ("rows", Memc[range_string], SZ_LINE) + + # ASCII output? table output? create a new output table? + listout = streq (Memc[outtab], "STDOUT") + if ( listout || (Memc[outtab] == EOS) ) { + tabout = false + } else { + tabout = true + new_table = (tbtacc (Memc[outtab]) == NO) + } + + if (tabout) { + call clgstr ("n_tab", nam_table, SZ_FNAME) + call clgstr ("n_nam", nam_name, SZ_COLNAME) + call clgstr ("n_nrows", nam_nrows, SZ_COLNAME) + call clgstr ("n_mean", nam_mean, SZ_COLNAME) + call clgstr ("n_stddev", nam_stddev, SZ_COLNAME) + call clgstr ("n_median", nam_med, SZ_COLNAME) + call clgstr ("n_min", nam_min, SZ_COLNAME) + call clgstr ("n_max", nam_max, SZ_COLNAME) + } + + # Expand the input table list. + list1 = tbnopen (Memc[inlist]) + + ntables = tbnlen (list1) + + if (listout) { + row = 0 # just to have a definite value + if (ntables > 1) { + call printf ("# nrows") + call printf (" mean stddev median") + call printf (" min max\n\n") + } + } else if (tabout) { + # Create output table (or open existing table) & define columns. + if (new_table) + otp = tbtopn (Memc[outtab], NEW_FILE, NULL) + else + otp = tbtopn (Memc[outtab], READ_WRITE, NULL) + + call tbcfnd (otp, nam_table, ocp[1], 1) + call tbcfnd (otp, nam_name, ocp[2], 1) + call tbcfnd (otp, nam_nrows, ocp[3], 1) + call tbcfnd (otp, nam_mean, ocp[4], 1) + call tbcfnd (otp, nam_stddev, ocp[5], 1) + call tbcfnd (otp, nam_med, ocp[6], 1) + call tbcfnd (otp, nam_min, ocp[7], 1) + call tbcfnd (otp, nam_max, ocp[8], 1) + + if (ocp[1] == NULL) + call tbcdef (otp, ocp[1], + nam_table, "", "", -SZ_FNAME, 1, 1) + if (ocp[2] == NULL) + call tbcdef (otp, ocp[2], + nam_name, "", "", -SZ_COLNAME, 1, 1) + if (ocp[3] == NULL) + call tbcdef (otp, ocp[3], + nam_nrows, "", "", TY_INT, 1, 1) + if (ocp[4] == NULL) + call tbcdef (otp, ocp[4], + nam_mean, "", "", TY_DOUBLE, 1, 1) + if (ocp[5] == NULL) + call tbcdef (otp, ocp[5], + nam_stddev, "", "", TY_DOUBLE, 1, 1) + if (ocp[6] == NULL) + call tbcdef (otp, ocp[6], + nam_med, "", "", TY_DOUBLE, 1, 1) + if (ocp[7] == NULL) + call tbcdef (otp, ocp[7], + nam_min, "", "", TY_DOUBLE, 1, 1) + if (ocp[8] == NULL) + call tbcdef (otp, ocp[8], + nam_max, "", "", TY_DOUBLE, 1, 1) + if (new_table) + call tbtcre (otp) + + row = tbpsta (otp, TBL_NROWS) # append to whatever is there + } + + # Do for each input table. + while (tbnget (list1, Memc[intab], SZ_LINE) != EOF) { + + itp = tbtopn (Memc[intab], READ_ONLY, NULL) + if (tbpsta (itp, TBL_NCOLS) == 1) { + cptr = tbcnum (itp, 1) + call tbcigt (cptr, TBL_COL_NAME, colname, SZ_COLNAME) + } else { + if (cl_colname[1] == EOS) { + if (nargs == 1 && in_redir) { + # in this case, the one argument should be column name + call clgstr ("intable", cl_colname, SZ_COLNAME) + # update par file + call clpstr ("intable", "STDIN") + call clpstr ("column", cl_colname) + } else { + call clgstr ("column", cl_colname, SZ_COLNAME) + } + } + call strcpy (cl_colname, colname, SZ_COLNAME) + call tbcfnd (itp, colname, cptr, 1) + } + if (cptr == NULL) { + call tbtclo (itp) + call eprintf ("column %s not found in %s\n") + call pargstr (colname) + call pargstr (Memc[intab]) + next + } + row = row + 1 + + if (listout) { + call printf ("# %s %s\n") + call pargstr (Memc[intab]) + call pargstr (colname) + if (ntables == 1) { + call printf ("# nrows") + call printf (" mean stddev median") + call printf (" min max\n") + } + } + + # Get statistics. + iferr { + call ts_values (itp, cptr, Memc[range_string], lowlim, highlim, + vmean, vstddev, vmedian, vmin, vmax, nrows) + } then { + call erract (EA_WARN) + next + } + + if (listout) { + + call printf ("%5d %17.10g %13.6g %13.6g %13.6g %13.6g\n") + call pargi (nrows) + call pargd (vmean) + call pargd (vstddev) + call pargd (vmedian) + call pargd (vmin) + call pargd (vmax) + + } else if (tabout) { + + # Write the values into the output table. + call tbeptt (otp, ocp[1], row, Memc[intab]) + call tbeptt (otp, ocp[2], row, colname) + call tbepti (otp, ocp[3], row, nrows) + call tbeptd (otp, ocp[4], row, vmean) + call tbeptd (otp, ocp[5], row, vstddev) + call tbeptd (otp, ocp[6], row, vmedian) + call tbeptd (otp, ocp[7], row, vmin) + call tbeptd (otp, ocp[8], row, vmax) + } + + call tbtclo (itp) # close current input table + } + + # Close the output table. It may be that nothing was written to it. + if (tabout) + call tbtclo (otp) + + # Save the results (from the last input table) in cl parameters. + call clputi ("nrows", nrows) + call clputd ("mean", vmean) + call clputd ("stddev", vstddev) + call clputd ("median", vmedian) + call clputd ("vmin", vmin) + call clputd ("vmax", vmax) + + call tbnclose (list1) + call sfree (sp) +end + +# ts_values -- get statistics for a table column +# This routine gets the mean, standard deviation, minimum and maximum +# values for a table column. If lower and/or upper cutoff limits were +# specified (i.e. are not INDEF) then values outside that range will not +# be included in the statistics. INDEF values are also not included. + +procedure ts_values (tp, cptr, range_str, lowlim, highlim, + vmean, vstddev, vmedian, vmin, vmax, nrows) + +pointer tp # i: ptr to table descriptor +pointer cptr # i: ptr to column descriptor +char range_str[ARB] # i: range of row numbers +double lowlim # i: lower cutoff of values to be included +double highlim # i: upper cutoff of values to be included +double vmean # o: mean value +double vstddev # o: standard deviation of values +double vmedian # o: median value +double vmin # o: minimum value +double vmax # o: maximum value +int nrows # o: number of rows included in the statistics +#-- +pointer sp +pointer val # scratch for array of values (for median) +pointer descrip # column selector descriptor +double value # an element gotten from the table +double sum, sumsq # for accumulating sums +double smin, smax # temp min & max +double diff # current value minus first good value +int all_elem # total number of elements in column +int selrows # number of selected rows +int nelem # number of elements in one cell +int nret # number returned (ignored) +int nr # current number of rows +int row # row number +int ranges[3,MAX_RANGES] # ranges of row numbers +int nvalues # returned by decode_ranges and ignored +int stat # returned by get_next_number +int i, j # loop indexes +bool chklow, chkhi # were low (high) limits specified? +bool val_ok # is current value within limits? +bool done # loop-termination flag +int decode_ranges(), get_next_number() +int tbagtd() +errchk tbagtd, tbegtd, tcs_rdaryd + +begin + if (decode_ranges (range_str, ranges, MAX_RANGES, nvalues) != OK) { + call eprintf ("rows = `%s'\n") + call pargstr (range_str) + call error (1, "Range of row numbers is invalid.") + } + + # Get the number of elements per table cell and the number of + # rows selected using row-selector syntax (as opposed to using + # the 'rows' task parameter). + call tbcnel1 (tp, cptr, descrip, nelem, selrows) + + # Find out how many rows there are in the table, restricted by + # either a row selector or the 'rows' task parameter, or both. + if (range_str[1] == '-' || range_str[1] == EOS) { + all_elem = selrows * nelem + } else { + # Count the number of rows specified. + i = 0 # count of row numbers + row = 0 # initialize get_next_number + done = false + while (!done) { + stat = get_next_number (ranges, row) + if (stat == EOF || row > selrows) + done = true + else + i = i + 1 + } + all_elem = i * nelem + } + + # Allocate scratch space to hold an entire column. + call smark (sp) + call salloc (val, all_elem, TY_DOUBLE) + + row = 0 # reinitialize get_next_number + stat = get_next_number (ranges, row) + done = (stat == EOF || row > selrows) + + # Get the data. + i = 1 + while (!done) { + + if (descrip == NULL) { + if (nelem == 1) + call tbegtd (tp, cptr, row, Memd[val+i-1]) + else + nret = tbagtd (tp, cptr, row, Memd[val+i-1], 1, nelem) + } else { + call tcs_rdaryd (tp, descrip, row, all_elem-i+1, + nret, Memd[val+i-1]) + } + + i = i + nelem + + stat = get_next_number (ranges, row) + done = (stat == EOF || row > selrows) + } + if (all_elem != i - 1) + call error (1, "not all elements read from column") + + chklow = ! IS_INDEFD(lowlim) + chkhi = ! IS_INDEFD(highlim) + + # Check which values to include. + sum = 0.d0 + nr = 0 + do i = 0, all_elem-1 { # zero indexed + value = Memd[val+i] + if (!IS_INDEFD(value)) { + val_ok = true # an initial value + if (chkhi && (value > highlim)) { + val_ok = false + Memd[val+i] = INDEFD + } + if (chklow && (value < lowlim)) { + val_ok = false + Memd[val+i] = INDEFD + } + if (val_ok) { + nr = nr + 1 + sum = sum + value + } + } + } + + if (nr < 1) { + # No rows with valid data, so set the output to INDEF. + nrows = 0 + vmean = INDEFD + vstddev = INDEFD + vmedian = INDEFD + vmin = INDEFD + vmax = INDEFD + return + } else { + # Assign two of the output values. + nrows = nr + vmean = sum / double(nr) + } + + # Move good data to the beginning of the array. + if (nr < all_elem) { + j = 0 + do i = 0, all_elem-1 { # zero indexed + if (!IS_INDEFD(Memd[val+i])) { + Memd[val+j] = Memd[val+i] + j = j + 1 + } + } + } + + # Find the min, max, standard deviation of the good values. + sumsq = 0.d0 + smin = Memd[val] + smax = Memd[val] + do i = 0, nr-1 { # zero indexed + value = Memd[val+i] + diff = value - vmean + sumsq = sumsq + diff * diff + if (value < smin) + smin = value + if (value > smax) + smax = value + } + + # Assign output values. + + vmin = smin + vmax = smax + + if (nr > 1) + vstddev = sqrt (sumsq / double(nr-1)) + else + vstddev = INDEFD + + # Determine the median. If there are an even number of values, the + # middle two are averaged. + if (nr < 3) { + vmedian = vmean + } else { + call asrtd (Memd[val], Memd[val], nr) + if (nr / 2 * 2 == nr) { # even number of values? + vmedian = (Memd[val+nr/2-1] + Memd[val+nr/2]) / 2.d0 + } else { + vmedian = Memd[val+nr/2] + } + } + + call sfree (sp) +end |