diff options
Diffstat (limited to 'pkg/utilities/nttools/tjoin')
-rw-r--r-- | pkg/utilities/nttools/tjoin/closeiotab.x | 22 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/dojoin.x | 97 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/freetol.x | 15 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/isnumber.x | 35 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/issame.x | 127 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/mkjoin.x | 106 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/mkpkg | 23 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/openitab.x | 82 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/openotab.x | 91 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/readtol.x | 55 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/removejcol.x | 43 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/renamecol.x | 109 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/spptype.x | 29 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/tjoin.h | 27 | ||||
-rw-r--r-- | pkg/utilities/nttools/tjoin/tjoin.x | 124 |
15 files changed, 985 insertions, 0 deletions
diff --git a/pkg/utilities/nttools/tjoin/closeiotab.x b/pkg/utilities/nttools/tjoin/closeiotab.x new file mode 100644 index 00000000..8d9ff7df --- /dev/null +++ b/pkg/utilities/nttools/tjoin/closeiotab.x @@ -0,0 +1,22 @@ +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# CLOSE_IOTAB -- Close table and release data structure describing it + +procedure close_iotab (tj) + +pointer tj # i: Data structure describing table +#-- + +begin + call tbtclo (TJ_TAB(tj)) + + if (TJ_JPTR(tj) != NULL) + call mfree (TJ_JPTR(tj), TY_INT) + + if (TJ_DPTR(tj) != NULL) + call mfree (TJ_DPTR(tj), TY_INT) + + call mfree (tj, TY_INT) +end diff --git a/pkg/utilities/nttools/tjoin/dojoin.x b/pkg/utilities/nttools/tjoin/dojoin.x new file mode 100644 index 00000000..133a2721 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/dojoin.x @@ -0,0 +1,97 @@ +include <tbset.h> +include "tjoin.h" + +# DOJOIN -- Compute the relational join of two tables +# +# B.Simon 03-Nov-87 First Code +# B.Simon 16-Dec-87 Changed to handle table subsets +# B.Simon 06-Feb-90 Changed to use tbtsrt +# B.Simon 06-Feb-90 Revised to do outer joins + +procedure dojoin (tj1, tj2, tjo, tol, extra, casesens) + +pointer tj1 # i: Table info descriptor for first input table +pointer tj2 # i: Table info descriptor for second input table +pointer tjo # i: Table info descriptor for output table +pointer tol # i: Descriptor of vector of tolerance values +int extra # i: Include non-joined columns in output +bool casesens # i: Join is case sensitive +#-- +int nrow1, nrow2, irow, jrow, krow +pointer match1, match2 + +bool is_same() +int tbpsta() + +begin + # Allocate arrays to hold unmatched rows + # in case extrarows is set + + nrow1 = tbpsta (TJ_TAB(tj1), TBL_NROWS) + nrow2 = tbpsta (TJ_TAB(tj2), TBL_NROWS) + + if (extra > 0) { + call calloc (match1, nrow1, TY_INT) + call calloc (match2, nrow2, TY_INT) + } + + # Naive approach to join compares every row in first table + # to second. This is slower than sorting first (N^2 vs. N log N) + # but the code is much simpler, especially with the extra + # problem of joining on row number and supporting inner and + # outer joins. + + krow = 1 + do irow = 1, nrow1 { + do jrow = 1, nrow2 { + # Equality test includes case insensitive string matches + # and fuzzy matching for numbers + + if (is_same (tj1, tj2, irow, jrow, tol, casesens)) { + # If match, write rows to output table + + call tbrcsc (TJ_TAB(tj1), TJ_TAB(tjo), TJ_DCOL(tj1,1), + TJ_DCOL(tjo,1), irow, krow, TJ_DNUM(tj1)) + call tbrcsc (TJ_TAB(tj2), TJ_TAB(tjo), TJ_DCOL(tj2,1), + TJ_DCOL(tjo,TJ_DNUM(tj1)+1), jrow, krow, + TJ_DNUM(tj2)) + + if (extra > 0) { + Memi[match1+irow-1] = jrow + Memi[match2+jrow-1] = irow + } + + krow = krow + 1 + } + } + } + + # Write the extra rows to the output table + + if (extra >= 1) { + do irow = 1, nrow1 { + if (Memi[match1+irow-1] == 0) { + call tbrcsc (TJ_TAB(tj1), TJ_TAB(tjo), TJ_DCOL(tj1,1), + TJ_DCOL(tjo,1), irow, krow, TJ_DNUM(tj1)) + krow = krow + 1 + } + } + } + + if (extra == 2) { + do jrow = 1, nrow2 { + if (Memi[match2+jrow-1] == 0) { + call tbrcsc (TJ_TAB(tj2), TJ_TAB(tjo), TJ_DCOL(tj2,1), + TJ_DCOL(tjo,TJ_DNUM(tj1)+1), jrow, krow, + TJ_DNUM(tj2)) + krow = krow + 1 + } + } + } + + if (extra > 0) { + call mfree (match1, TY_INT) + call mfree (match2, TY_INT) + } + +end diff --git a/pkg/utilities/nttools/tjoin/freetol.x b/pkg/utilities/nttools/tjoin/freetol.x new file mode 100644 index 00000000..45857c73 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/freetol.x @@ -0,0 +1,15 @@ +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# FREE_TOL -- Free the structure containing tolerance values + +procedure free_tol (tol) + +pointer tol # i: Vector of tolerance values +#-- + +begin + call mfree (TOL_PTR(tol), TY_DOUBLE) + call mfree (tol, TY_INT) +end diff --git a/pkg/utilities/nttools/tjoin/isnumber.x b/pkg/utilities/nttools/tjoin/isnumber.x new file mode 100644 index 00000000..3efb0dba --- /dev/null +++ b/pkg/utilities/nttools/tjoin/isnumber.x @@ -0,0 +1,35 @@ +include <ctype.h> +include <lexnum.h> + +# B.Simon 16-Apr-99 first code + +# IS_NUMBER -- Test string to see if it represents a number + +bool procedure is_number (str) + +char str[ARB] # i: String to be tested +#-- +int ic, nc, type +int lexnum() + +begin + # Use lexnum to determine string type + + ic = 1 + type = lexnum (str, ic, nc) + + # Any non-white characters after the number + # indicate this is not a number + + ic = ic + nc + while (str[ic] != EOS) { + if (! IS_WHITE(str[ic])) + return (false) + + ic = ic + 1 + } + + # Test for numeric types and return result of test + + return (type == LEX_OCTAL || type == LEX_DECIMAL || type == LEX_REAL) +end diff --git a/pkg/utilities/nttools/tjoin/issame.x b/pkg/utilities/nttools/tjoin/issame.x new file mode 100644 index 00000000..89ea221d --- /dev/null +++ b/pkg/utilities/nttools/tjoin/issame.x @@ -0,0 +1,127 @@ +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# IS_SAME -- See if two values in different tables are the same + +bool procedure is_same (tj1, tj2, irow, jrow, tol, casesens) + +pointer tj1 # i: Table info descriptor for first input table +pointer tj2 # i: Table info descriptor for second input table +int irow # i: Row number of element in first table +int jrow # i: Row number of element in second table +pointer tol # i: Descriptor of vecor of tolerance values +bool casesens # i: Join is case sensitive +#-- +bool same +double dval1, dval2 +int icol, dtype1, dtype2, ival1, ival2 +pointer sp, str1, str2 + +string badtol "Tolerance must be zero for joins on non-numeric columns" +string badtype "Type mismatch on join columns" + +bool streq() +int spp_type() + +begin + # Allocate memory for table strings + + call smark (sp) + call salloc (str1, SZ_LINE, TY_CHAR) + call salloc (str2, SZ_LINE, TY_CHAR) + + same = true + do icol = 1, TJ_JNUM(tj1) { + if (! same) + break + + # Get column data types + + dtype1 = spp_type (TJ_JCOL(tj1,icol)) + dtype2 = spp_type (TJ_JCOL(tj2,icol)) + + # Comparison depends on data type + + if (dtype1 == TY_CHAR && dtype2 == TY_CHAR) { + # Nonzero tolerance illegal on string columns + + if (TOL_VAL(tol,icol) != 0.0) + call error (1, badtol) + + call tbegtt (TJ_TAB(tj1), TJ_JCOL(tj1,icol), irow, + Memc[str1], SZ_LINE) + call tbegtt (TJ_TAB(tj2), TJ_JCOL(tj2,icol), jrow, + Memc[str2], SZ_LINE) + + # Convert to lower case for case insensitive match + + if (! casesens) { + call strlwr (Memc[str1]) + call strlwr (Memc[str2]) + } + + # Test for undefined values first, which never match + + if (Memc[str1] == EOS || Memc[str2] == EOS) { + same = false + } else { + same = streq (Memc[str1], Memc[str2]) + } + + } else if (dtype1 == TY_BOOL && dtype2 == TY_BOOL) { + # Nonzero tolerance illegal on boolean column + + if (TOL_VAL(tol,icol) != 0.0) + call error (1, badtol) + + # Read boolean as integer so we can detect undefined values + + call tbegti (TJ_TAB(tj1), TJ_JCOL(tj1,icol), irow, ival1) + call tbegti (TJ_TAB(tj2), TJ_JCOL(tj2,icol), jrow, ival2) + + # Undefined values never match anything + + if (IS_INDEFI(ival1) || IS_INDEFI(ival2)) { + same = false + } else { + same = ival1 == ival2 + } + + } else if (dtype1 == TY_CHAR || dtype1 == TY_BOOL || + dtype2 == TY_BOOL || dtype2 == TY_BOOL) { + + # Catch comparison of numeric and non-numeric values + + call error (1, badtype) + + } else { + # Null column pointer indicates the join is done on row number + + if (TJ_JCOL(tj1,icol) == NULL) { + dval1 = irow + } else { + call tbegtd (TJ_TAB(tj1), TJ_JCOL(tj1,icol), irow, dval1) + } + + if (TJ_JCOL(tj2,icol) == NULL) { + dval2 = jrow + } else { + call tbegtd (TJ_TAB(tj2), TJ_JCOL(tj2,icol), jrow, dval2) + } + + # Undefined values never match + # Numeric values must be checked to see if the + # difference is smaller than the tolerance + + if (IS_INDEFD(dval1) || IS_INDEFD(dval2)) { + same = false + } else { + same = abs (dval2 - dval1) <= TOL_VAL(tol,icol) + } + } + } + + call sfree (sp) + return (same) +end diff --git a/pkg/utilities/nttools/tjoin/mkjoin.x b/pkg/utilities/nttools/tjoin/mkjoin.x new file mode 100644 index 00000000..46667b3e --- /dev/null +++ b/pkg/utilities/nttools/tjoin/mkjoin.x @@ -0,0 +1,106 @@ +include <tbset.h> + +# MKJOIN -- Create a table that will hold the join of two other tables +# +# B.Simon 04-Nov-87 First Code +# B.Simon 31-Mar-92 Set output table type from input tables +# B.Simon 14-Apr-99 Extracted code that creates table + +pointer procedure mkjoin (tol, tp1, cp1, tp2, cp2, outtable, otp, + cpvec1, cpvec2, cpveco, ncol1, ncol2) + +double tol # i: Tolerance used in testing for equality +pointer tp1 # i: Table descriptor of first table +pointer cp1 # i: Descriptor of merged column in first table +pointer tp2 # i: Table descriptor of second table +pointer cp2 # i: Descriptor of merged column in second table +char outtable[ARB] # i: Name of output table +pointer otp # i: Table descriptor of output table +pointer cpvec1[ARB] # i: Vector of columns in first input table +pointer cpvec2[ARB] # i: Vector of columns in second input table +pointer cpveco[ARB] # i: Vector of columns in output table +int ncol1 # i: Number of columns in first input table +int ncol2 # u: Number of columns in second input table +#-- +int icol, jcol, numcol, type1, type2 +int colnum[1], datatype[1], lendata[1], lenfmt[1] +pointer sp, icp, ocp, oldcol, newcol +pointer colname, colunits, colfmt + +int tbpsta(), tbcnum(), tbcigi() +pointer tbtopn() + +begin + # Set up arrays in dynamic memory + + call smark (sp) + call salloc (colname, SZ_COLNAME, TY_CHAR) + call salloc (colunits, SZ_COLUNITS, TY_CHAR) + call salloc (colfmt, SZ_COLFMT, TY_CHAR) + + # Copy column pointers to old column array. If the tolerance is + # zero, the join column in the second table is not copied + + numcol = ncol1 + ncol2 + + do icol = 1, ncol1 + cpvec1[icol] = tbcnum (tp1, icol) + + do icol = 1, ncol2 + cpvec2[icol] = tbcnum (tp2, icol) + + if (tol == 0.0 && cp1 != NULL && cp2 != NULL) { + jcol = tbcigi (cp2, TBL_COL_NUMBER) + ncol2 = ncol2 - 1 + numcol = numcol - 1 + do icol = jcol+1, ncol2 + cpvec2[icol-1] = cpvec2[icol] + } + + # Set type of output table + + otp = tbtopn (outtable, NEW_FILE, NULL) + + type1 = tbpsta (tp1, TBL_WHTYPE) + type2 = tbpsta (tp2, TBL_WHTYPE) + if (type1 == type2) + call tbpset (otp, TBL_WHTYPE, type1) + + # Copy column information from the input tables to the output table + + do icol = 1, ncol1 { + icp = cpvec1[icol] + call tbcinf (icp, colnum, Memc[colname], Memc[colunits], + Memc[colfmt], datatype[1], lendata[1], lenfmt[1]) + + call newcolnam (numcol, Memi[oldcol], icol, + Memc[colname], SZ_COLNAME) + + call tbcdef (otp, ocp, Memc[colname], Memc[colunits], Memc[colfmt], + datatype[1], lendata[1], 1) + cpveco[icol] = ocp + } + + do icol = 1, ncol2 { + icp = cpvec2[icol] + call tbcinf (icp, colnum, Memc[colname], Memc[colunits], + Memc[colfmt], datatype[1], lendata[1], lenfmt[1]) + call newcolnam (numcol, Memi[oldcol], icol, + Memc[colname], SZ_COLNAME) + call tbcdef (otp, ocp, Memc[colname], Memc[colunits], Memc[colfmt], + datatype[1], lendata[1], 1) + cpveco[ncol1+icol] = ocp + } + + # Copy the table columns a row at a time + + call tbtcre (otp) + call tbhcal (tp2, otp) + call tbhcal (tp1, otp) + + call mfree (oldcol, TY_INT) + call mfree (newcol, TY_INT) + call sfree (sp) + + return (otp) +end diff --git a/pkg/utilities/nttools/tjoin/mkpkg b/pkg/utilities/nttools/tjoin/mkpkg new file mode 100644 index 00000000..e421c190 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/mkpkg @@ -0,0 +1,23 @@ +# Update the tjoin application code in the ttools package library +# Author: B.Simon, 25-NOV-1987 + +$checkout libpkg.a ../ +$update libpkg.a +$checkin libpkg.a ../ +$exit + +libpkg.a: + closeiotab.x "tjoin.h" + dojoin.x <tbset.h> "tjoin.h" + freetol.x "tjoin.h" + isnumber.x <ctype.h> <lexnum.h> + issame.x "tjoin.h" + mkjoin.x <tbset.h> + openitab.x <tbset.h> "tjoin.h" + openotab.x <tbset.h> "tjoin.h" + readtol.x "tjoin.h" + removejcol.x "tjoin.h" + renamecol.x <ctype.h> <tbset.h> "tjoin.h" + spptype.x <tbset.h> + tjoin.x <tbset.h> "tjoin.h" + ; diff --git a/pkg/utilities/nttools/tjoin/openitab.x b/pkg/utilities/nttools/tjoin/openitab.x new file mode 100644 index 00000000..ebdbcf97 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/openitab.x @@ -0,0 +1,82 @@ +include <tbset.h> +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# OPEN_ITAB -- Open one of the input tables used in the join + +pointer procedure open_itab (intable, column) + +char intable[ARB] # i: Input table name +char column[ARB] # i: List of join columns +#-- +int ic, icol +pointer tj, sp, cname, errtxt + +string nojoincol "No column supplied as join column" +string badcolnam "Column name not found in table (%s[c:%s])" +string notopen "Could not open table (%s)" + +bool strne() +int tbpsta(), tbcnum(), word_count(), word_fetch() +pointer tbtopn() + +begin + # Allocate memory for temporary strings + + call smark (sp) + call salloc (cname, SZ_COLNAME, TY_CHAR) + call salloc (errtxt, SZ_LINE, TY_CHAR) + + # Allocate memory for data structure + + call calloc (tj, LEN_TJSTRUCT, TY_INT) + + # Open table and put descriptor in structure + + iferr { + TJ_TAB(tj) = tbtopn (intable, READ_ONLY, NULL) + } then { + call sprintf (Memc[errtxt], SZ_LINE, notopen) + call pargstr (intable) + call error (1, Memc[errtxt]) + } + + # Create array of data columns + + TJ_DNUM(tj) = tbpsta (TJ_TAB(tj), TBL_NCOLS) + call malloc (TJ_DPTR(tj), TJ_DNUM(tj), TY_INT) + + do icol = 1, TJ_DNUM(tj) + TJ_DCOL(tj,icol) = tbcnum (TJ_TAB(tj), icol) + + # Create array of join columns + + TJ_JNUM(tj) = word_count (column) + if (TJ_JNUM(tj) == 0) + call error (1, nojoincol) + + call malloc (TJ_JPTR(tj), TJ_JNUM(tj), TY_INT) + + ic = 1 + icol = 1 + while (word_fetch (column, ic, Memc[cname], SZ_COLNAME) > 0) { + call tbcfnd (TJ_TAB(tj), Memc[cname], TJ_JCOL(tj,icol), 1) + + if (TJ_JCOL(tj,icol) == NULL) { + if (strne (Memc[cname], ROWNAME)) { + call sprintf (Memc[errtxt], SZ_LINE, badcolnam) + call pargstr (intable) + call pargstr (Memc[cname]) + call error (1, Memc[errtxt]) + } + } + + icol = icol + 1 + } + + # Free temporary memory and return descriptor of new structure + + call sfree (sp) + return (tj) +end diff --git a/pkg/utilities/nttools/tjoin/openotab.x b/pkg/utilities/nttools/tjoin/openotab.x new file mode 100644 index 00000000..bd84ca6a --- /dev/null +++ b/pkg/utilities/nttools/tjoin/openotab.x @@ -0,0 +1,91 @@ +include <tbset.h> +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# OPEN_OTAB -- Open the output table + +pointer procedure open_otab (outtable, tj1, tj2) + +char outtable[ARB] # i: Output table name +pointer tj1 # i: First input table descriptor +pointer tj2 # i: Second input table descriptor +#-- +int type1, type2, icol, jcol, itab, tji[2] +int colnum, datatype, lendata, lenfmt +pointer tjo, sp, colname, colunits, colfmt, errtxt + +string notopen "Could not open table (%s)" + +int tbpsta() +pointer tbtopn() + +begin + # Allocate memory for temporary strings + + call smark (sp) + call salloc (colname, SZ_COLNAME, TY_CHAR) + call salloc (colunits, SZ_COLUNITS, TY_CHAR) + call salloc (colfmt, SZ_COLFMT, TY_CHAR) + call salloc (errtxt, SZ_LINE, TY_CHAR) + + # Allocate memory for data structure + + call malloc (tjo, LEN_TJSTRUCT, TY_INT) + + # Open table and put descriptor in structure + + iferr { + TJ_TAB(tjo) = tbtopn (outtable, NEW_FILE, NULL) + } then { + call sprintf (Memc[errtxt], SZ_LINE, notopen) + call pargstr (outtable) + call error (1, Memc[errtxt]) + } + + # Set table type based on input tables + + type1 = tbpsta (TJ_TAB(tj1), TBL_WHTYPE) + type2 = tbpsta (TJ_TAB(tj2), TBL_WHTYPE) + if (type1 == type2) + call tbpset (TJ_TAB(tjo), TBL_WHTYPE, type1) + + # No join columns are used for output table + + TJ_JNUM(tjo) = 0 + TJ_JPTR(tjo) = NULL + + # Allocate array to hold output table data columns + + TJ_DNUM(tjo) = TJ_DNUM(tj1) + TJ_DNUM(tj2) + call malloc (TJ_DPTR(tjo), TJ_DNUM(tjo), TY_INT) + + # Copy column information from the input tables to the output table + + tji[1] = tj1 + tji[2] = tj2 + + jcol = 1 + do itab = 1, 2 { + do icol = 1, TJ_DNUM(tji[itab]) { + call tbcinf (TJ_DCOL(tji[itab],icol), colnum, Memc[colname], + Memc[colunits], Memc[colfmt], datatype, + lendata, lenfmt) + + call renamecol (tji, itab, icol, Memc[colname], SZ_COLNAME) + + call tbcdef (TJ_TAB(tjo), TJ_DCOL(tjo,jcol), Memc[colname], + Memc[colunits], Memc[colfmt], datatype, + lendata, 1) + + jcol = jcol + 1 + } + } + + call tbtcre (TJ_TAB(tjo)) + call tbhcal (TJ_TAB(tj1), TJ_TAB(tjo)) + call tbhcal (TJ_TAB(tj2), TJ_TAB(tjo)) + + call sfree (sp) + return (tjo) +end diff --git a/pkg/utilities/nttools/tjoin/readtol.x b/pkg/utilities/nttools/tjoin/readtol.x new file mode 100644 index 00000000..4b35f522 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/readtol.x @@ -0,0 +1,55 @@ +include "tjoin.h" +define SZ_VALUE 30 + +# B.Simon 16-Apr-99 first code + +# READ_TOL -- Parse the string containing the vector of tolerance values + +pointer procedure read_tol (tolerance) + +char tolerance[ARB] # i: Comma separated string of tolerance values +#-- +int ic, jc, nc, ival +pointer sp, value, errtxt, tol + +string badvalue "Invalid value in tolerance (%s)" +string negvalue "Negative value in tolerance (%g)" + +bool is_number() +int word_count(), word_fetch(), ctod() + +begin + call smark (sp) + call salloc (value, SZ_VALUE, TY_CHAR) + call salloc (errtxt, SZ_LINE, TY_CHAR) + + call malloc (tol, LEN_TOLSTRUCT, TY_INT) + + TOL_NUM(tol) = word_count (tolerance) + call malloc (TOL_PTR(tol), TOL_NUM(tol), TY_DOUBLE) + + ic = 1 + ival = 1 + while (word_fetch (tolerance, ic, Memc[value], SZ_VALUE) > 0) { + if (! is_number (Memc[value])) { + call sprintf (Memc[errtxt], SZ_LINE, badvalue) + call pargstr (Memc[value]) + call error (1, Memc[errtxt]) + } + + jc = 1 + nc = ctod (Memc[value], jc, TOL_VAL(tol,ival)) + + if (TOL_VAL(tol,ival) < 0.0) { + call sprintf (Memc[errtxt], SZ_LINE, negvalue) + call pargd (TOL_VAL(tol,ival)) + call error (1, Memc[errtxt]) + } + + ival = ival + 1 + } + + call sfree (sp) + return (tol) +end + diff --git a/pkg/utilities/nttools/tjoin/removejcol.x b/pkg/utilities/nttools/tjoin/removejcol.x new file mode 100644 index 00000000..1578021b --- /dev/null +++ b/pkg/utilities/nttools/tjoin/removejcol.x @@ -0,0 +1,43 @@ +include "tjoin.h" + +# B.Simon 16-Apr-99 first code + +# REMOVE_JCOL -- Remove join columns from list of data columns + +procedure remove_jcol (tj, tol) + +pointer tj # i: Descriptor of table information +pointer tol # i: Vector of tolerances used in equality test +#-- +bool match +int icol, jcol, kcol + +begin + kcol = 0 + do icol = 1, TJ_DNUM(tj) { + # Determine if this column is a join column + # with strict equality testing + + match = false + do jcol = 1, TJ_JNUM(tj) { + if (TJ_DCOL(tj,icol) == TJ_JCOL(tj,jcol) && + TOL_VAL(tol,jcol) == 0.0) { + match = true + break + } + } + + # Don't copy these columns as they duplicate the values + # in the join column in the other table. Also don't copy + # if icol == kcol in order to save time + + if (! match) { + kcol = kcol + 1 + if (kcol < icol) + TJ_DCOL(tj,kcol) = TJ_DCOL(tj,icol) + } + } + + TJ_DNUM(tj) = kcol +end + diff --git a/pkg/utilities/nttools/tjoin/renamecol.x b/pkg/utilities/nttools/tjoin/renamecol.x new file mode 100644 index 00000000..03d87041 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/renamecol.x @@ -0,0 +1,109 @@ +include <ctype.h> +include <tbset.h> +include "tjoin.h" + +# RENAMECOL -- Rename a column to make its name unique +# +# If the name of the column pointed to by that index is unique, it is output +# as the new name. If it is not unique, a suffix of the form "_i" is appended +# to the name, where i is a digit which (hopefully) makes the name unique. +# +# B.Simon 03-Nov-87 first code +# B.Simon 04-Sep-90 Replaced call to strncmp with streq +# B.Simon 16-Apr-99 Revised version to work with tjoin + +procedure renamecol (tji, jtab, jcol, colname, maxch) + +pointer tji[2] # i: Array of table info descriptors +int jtab # i: Index of table containing column +int jcol # i: Index of column within table +char colname # u: Column name +int maxch # i: Max length of column name +#-- +bool before +int olen, nmatch, nbefore, itab, icol +pointer sp, oldnam, tmpnam, errtxt + +string notuniq "Cannot create a unique column name (%s)" + +bool streq() +int strlen() + +begin + # Allocate dynamic memory for strings + + call smark (sp) + call salloc (oldnam, SZ_COLNAME, TY_CHAR) + call salloc (tmpnam, SZ_COLNAME, TY_CHAR) + call salloc (errtxt, SZ_LINE, TY_CHAR) + + # Copy name to temporrary variable + + call strcpy (colname, Memc[oldnam], SZ_COLNAME) + call strupr (Memc[oldnam]) + + # See if the name is unique, and if not, how many columns with + # the same name precede this one + + nmatch = 0 + nbefore = 0 + before = true + + do itab = 1, 2 { + do icol = 1, TJ_DNUM(tji[itab]) { + call tbcigt (TJ_DCOL(tji[itab],icol), TBL_COL_NAME, + Memc[tmpnam], SZ_COLNAME) + call strupr (Memc[tmpnam]) + + if (streq (Memc[tmpnam], Memc[oldnam])) { + nmatch = nmatch + 1 + + if (before) + nbefore = nbefore + 1 + } + + if (itab == jtab && icol == jcol) + before = false + } + } + + # If the name is not unique, add a suffix of the form "_i" + + if (nmatch > 1) { + + # Check for ridiculous values of maxch + + olen = min (maxch-2, strlen(Memc[oldnam])) + if (olen < 1) { + call sprintf (Memc[errtxt], SZ_LINE, notuniq) + call pargstr (Memc[oldnam]) + call error (1, Memc[errtxt]) + } + + # Add the suffix + + Memc[oldnam+olen] = '_' + Memc[oldnam+olen+1] = TO_DIGIT (nbefore) + Memc[oldnam+olen+2] = EOS + + # Make sure it is unique + + do itab = 1, 2 { + do icol = 1, TJ_DNUM(tji[itab]) { + call tbcigt (TJ_DCOL(tji[itab],icol), TBL_COL_NAME, + Memc[tmpnam], SZ_COLNAME) + + if (streq (Memc[oldnam], Memc[tmpnam])) { + call sprintf (Memc[errtxt], SZ_LINE, notuniq) + call pargstr (Memc[oldnam]) + call error (1, Memc[errtxt]) + } + } + } + } + + # Copy to the output string + + call strcpy (Memc[oldnam], colname, maxch) + call sfree (sp) +end diff --git a/pkg/utilities/nttools/tjoin/spptype.x b/pkg/utilities/nttools/tjoin/spptype.x new file mode 100644 index 00000000..e454c9ba --- /dev/null +++ b/pkg/utilities/nttools/tjoin/spptype.x @@ -0,0 +1,29 @@ +include <tbset.h> + +# B.Simon 16-Apr-99 first code + +# SPP_TYPE -- Retrieve the spp type of a table column + +int procedure spp_type (cp) + +pointer cp # i: Column pointer +#-- +int dtype +int tbcigi() + +begin + if (cp == NULL) { + # Null column pointer indicates row number + + dtype = TY_INT + + } else { + # Table data types store strings as negative values + + dtype = tbcigi (cp, TBL_COL_DATATYPE) + if (dtype < 0) + dtype = TY_CHAR + } + + return (dtype) +end diff --git a/pkg/utilities/nttools/tjoin/tjoin.h b/pkg/utilities/nttools/tjoin/tjoin.h new file mode 100644 index 00000000..2e7155b0 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/tjoin.h @@ -0,0 +1,27 @@ +# TJOIN.H -- Constants and data structures used by tjoin + +define ROWNAME "row" # string that indicates row number + # as join column + +# Structure used to hold information about tables + +define LEN_TJSTRUCT 7 + +define TJ_TAB Memi[$1] # Table descriptor +define TJ_JNUM Memi[$1+1] # Number of join columns +define TJ_DNUM Memi[$1+2] # Number of data columns +define TJ_JPTR Memi[$1+3] # Pointer to array of join columns +define TJ_DPTR Memi[$1+4] # Pointer to array of data colomns + +define TJ_JCOL Memi[TJ_JPTR($1)+$2-1] +define TJ_DCOL Memi[TJ_DPTR($1)+$2-1] + +# Structure used to hold tolerance vector + +define LEN_TOLSTRUCT 2 + +define TOL_NUM Memi[$1] # Number of tolerance values +define TOL_PTR Memi[$1+1] # Pointer to array of tolerance values + +define TOL_VAL Memd[TOL_PTR($1)+$2-1] + diff --git a/pkg/utilities/nttools/tjoin/tjoin.x b/pkg/utilities/nttools/tjoin/tjoin.x new file mode 100644 index 00000000..39963721 --- /dev/null +++ b/pkg/utilities/nttools/tjoin/tjoin.x @@ -0,0 +1,124 @@ +include <tbset.h> +include "tjoin.h" + +define SYNTAX 1 +define BIG_TABLE 5000 + +# TJOIN -- Join two tables on the basis of equality in a common column +# +# B.Simon 03-Nov-1987 First Code +# Phil Hodge 08-Apr-1999 Call tbfpri. +# B.Simon 16-Apr-1999 Support outer join and multiple join columns +# Phil Hodge 21-Jun-2001 Realloc TOL_PTR before copying tolerance value + +procedure t_tjoin() + +pointer intable1 # Names of the first table to be joined +pointer intable2 # Names of the second table to be joined +pointer outtable # Name of output table +pointer column1 # Name of columns to join in first table +pointer column2 # Name of columns to join in second table +pointer extrarows # Include unmatched rows from which table? +pointer tolerance # Tolerance used in testing for equality +bool casesens # Case sensitivity flag +#-- +int phu_copied # set by tbfpri and ignored +int extra, ival +pointer sp, errtxt, tj1, tj2, tjo, tol + +string extraopt "|neither|first|both|" +string badextra "Illegal value for extrarows" +string badjnum "Number of join columns do not match" +string badtolnum "Number of tolereances and join columns do not match" +string badcolnam "Column name not found in table (%s)" + +bool clgetb() +int strdic() +pointer read_tol(), open_itab(), open_otab() + +begin + # Allocate stack memory for strings + + call smark (sp) + call salloc (intable1, SZ_FNAME, TY_CHAR) + call salloc (column1, SZ_COLNAME, TY_CHAR) + call salloc (intable2, SZ_FNAME, TY_CHAR) + call salloc (column2, SZ_COLNAME, TY_CHAR) + call salloc (outtable, SZ_FNAME, TY_CHAR) + call salloc (extrarows, SZ_FNAME, TY_CHAR) + call salloc (tolerance, SZ_FNAME, TY_CHAR) + call salloc (errtxt, SZ_LINE, TY_CHAR) + + # Read the task parameters + + call clgstr ("intable1", Memc[intable1], SZ_FNAME) + call clgstr ("intable2", Memc[intable2], SZ_FNAME) + call clgstr ("outtable", Memc[outtable], SZ_FNAME) + + call clgstr ("column1", Memc[column1], SZ_COLNAME) + call clgstr ("column2", Memc[column2], SZ_COLNAME) + + call clgstr ("extrarows", Memc[extrarows], SZ_FNAME) + call clgstr ("tolerance", Memc[tolerance], SZ_FNAME) + casesens = clgetb ("casesens") + + # Check value of extrarows + + extra = strdic (Memc[extrarows], Memc[extrarows], SZ_FNAME, extraopt) + + if (extra == 0) { + call sprintf (Memc[errtxt], SZ_LINE, badextra) + call pargstr (Memc[extrarows]) + call error (SYNTAX, Memc[errtxt]) + } + + extra = extra - 1 + + # Parse the string of tolerance values + + tol = read_tol (Memc[tolerance]) + + # Open the input tables and get the column pointers + + tj1 = open_itab (Memc[intable1], Memc[column1]) + tj2 = open_itab (Memc[intable2], Memc[column2]) + + # Check the number of join columns and tolerances for agreement + + if (TJ_JNUM(tj1) != TJ_JNUM(tj2)) + call error (1, badjnum) + + if (TJ_JNUM(tj1) != TOL_NUM(tol)) { + if (TOL_NUM(tol) == 1) { + TOL_NUM(tol) = TJ_JNUM(tj1) + call realloc (TOL_PTR(tol), TOL_NUM(tol), TY_DOUBLE) + do ival = 2, TJ_JNUM(tj1) + TOL_VAL(tol,ival) = TOL_VAL(tol,1) + } else { + call error (1, badtolnum) + } + } + + # Remove data columns from second table which are also + # join columns in the first table + + call remove_jcol (tj2, tol) + + # Create the output table + + call tbfpri (Memc[intable1], Memc[outtable], phu_copied) + tjo = open_otab (Memc[outtable], tj1, tj2) + + # Compute the join of the two tables + + call dojoin (tj1, tj2, tjo, tol, extra, casesens) + + # Close the tables and free dynamic memory + + call free_tol (tol) + + call close_iotab (tj1) + call close_iotab (tj2) + call close_iotab (tjo) + call sfree (sp) +end |