VOTPARSE -- VOTable parser API. This interface allows a VOTable to read read or created from a number of calling languages. XML parsing is done using a SAX parser on input, when creating new VOTables the API is used to populate the VOTable structure before writing the final output. We attempt to implement all aspects of the VOTable v1.2 specification and remain compatible with earlier versions to the extent possible. Public Interface: ----------------- vot = vot_openVOTABLE (str|fname) vot_closeVOTABLE (vot) res = vot_getRESOURCE (vot|res) tab = vot_getTABLE (res) field = vot_getFIELD (tab) data = vot_getDATA (tab) tdata = vot_getTABLEDATA (data) // data elements tr = vot_getTR (tdata) td = vot_getTD (tr) bin = vot_getBINARY (data) fits = vot_getFITS (data) group = vot_getGROUP (vot|res|tab|group) fldref = vot_getFIELDRef (group) parref = vot_getPARAMRef (group) desc = vot_getDESCRIPTION (handle) param = vot_getPARAM (handle) info = vot_getINFO (handle) stream = vot_getSTREAM (bin|fits) val = vot_getVALUES (field|param|info) min = vot_getMIN (val) max = vot_getMAX (val) opt = vot_getOPTION (val) link = vot_getLINK (res|info|param|field|table) sys = vot_getCOOSYS (vot) // backwards compat. handle = vot_newNode (parent, type) vot_attachNode (parent, handle) vot_deleteNode (handle) Output Routines: vot_writeVOTable (vot, fd) vot_writeHTML (vot, fd) vot_writeDelimitedTable (vot, fd, delim) w Convenience Functions: str = vot_getTableCell (tdata, row, col) n = vot_getNRows (tdata) n = vot_getNCols (tdata) type = vot_getDATAType (data) str = vot_getAttr (handle, attr) stat = vot_setAttr (handle, attr, value) str = vot_getValue (handle) str = vot_setValue (handle, value) len = vot_getLength (handle) len = vot_getNumberOf (handle, type) handle = vot_findByAttr (parent, type, attr, value) handle[] = vot_findInGroup (group, TY_FIELD|TY_PARAM) Low-level Interface (Private?): handle = vot_getNext (handle) handle = vot_getSibling (handle) handle = vot_getChild (handle) handle = vot_getChildOfType (handle, type) handle = vot_getParent (handle) type = vot_valueOf (handle) ------------------------------------------------------------------------------ Data Structures --------------- #define TY_VOTABLE 1 #define TY_RESOURCE 2 #define TY_FIELD 3 #define TY_PARAM 4 #define TY_INFO 5 #define TY_ROW 6 #define TY_DATA 7 #define TY_VALUES 8 #define TY_STREAM 9 #define TY_LINK 10 #define TY_FIELDREF 11 #define TY_PARAMREF 12 #define TY_OPTION 13 #define TY_MIN 14 #define TY_MAX 15 #define TY_FITS 16 #define TY_TABLE 17 #define TY_GROUP 18 #define TY_COOSYS 19 #define MAX_ATTRS 11 #define SZ_ATTR_NAME 16 #define SZ_ATTR_VALUE 64 typedef unsigned int handle_t; typedef struct { char name[SZ_ATTR_NAME]; /* attribute name */ char value[SZ_ATTR_VALUE]; /* attribute value */ } Attr, *AttrP; typedef struct { char type; /* node type */ void *next; /* sibling node */ void *child; /* child nodes */ void *child_last; /* last child node */ void *parent; /* parent node */ char *data; /* value string */ Attr attr[MAX_ATTRS]; /* attribute array */ int nattrs; /* number of attributes */ } Node, *NodeP; - Need to keep track of last child so we can easily append new tags - The 'data' string is any text in the xml element, e.g. given yada yada yada the 'data' string is then 'yada yada yada' - structs should be calloc'd to initialize to NULL ------------------------------------------------------------------------------ Notes: ------ - The 'handle' is an opaque reference to the node that is meant to be language-neutral. In reality it is just the pointer to the Node struct cast as an integer. Note that on 64-bit platforms, bindings will require the handle to be typed as a 'long'value. - The vot_openVOTable() is used both for reading and writing VOTables. The read, the argument may a string which is a filename to be read or a literal string assumed to be the VOTable document itself. If passed a NULL pointer, a new document structure will be created and we assume we'll write it out later. - The various vot_get() functions in reality only return the single handle of the specified type, or a NULL if not found. This node is required to be a child of the argument (parent) node. vot_getFIELD (handle_t parent) { Node *ip = (handle_t) NULL;: Node *p = (Node *) parent; Node *c = p->child; int ptype = p->type; if (ptype != TY_TABLE) // check for valid parent fprintf (stderr, "Invalid node for FIELDs\n"); for (ip=c; ip; ip = ip->next) if (ip->type == TY_FIELD) break; return ((handle_t) ip); } - A call to vot_getNext() returns the next sibling of the same type as the arg. In contrast, vot_getSibling() returns the next sibling node regardless of type. vot_getNext (handle_t tag) { Node *ip: Node *np = (Node *) tag; int type = p->type; for (ip=np->next; ip; ip = ip->next) if (ip->type == np->type) return ((handle_t) ip); return ((handle_t) NULL); } vot_getSibling (handle_t tag) { Node *np = (Node *) tag; return ((handle_t) np->next); } - vot_newNode() creates an empty node structure, other calls are used to populate the attributes, values, or child nodes. OTOH, methods like vot_attachNode() / vot_deleteNode() use handles that can refer to sub-trees. This makes it easy to refer to e.g. an entire RESOURCE and then attach it to a new document or delete entirely. The methods that care of adusting the child/next pointers. vot_newNode (handle_t parent, int type) { Node *np = (Node *) calloc (1, sizeof (Node)); np->type = type; return ((handle_t) np);; } vot_attachNode (handle_t parent, handle_t node) { Node *p = (Node *) parent, *n = (Node *) node, *last = p->child_last; if (p->child) last->next = n; // append existing children else p->child = n; // make an only child p->child_last = n // update parent } vot_deleteNode (handle_t node) { Node *n = (Node *) node, *p = n->parent, *last = p->child_last, *prev = (Node *) NULL; if (p->child == n) { // node is first child p->child = n->next; } else { // Find the previous sibling node for (prev=p->child; prev->next != n; prev=prev->next) ; prev->next = n->next; if (p->child_last == n) //update parent p->child_last = prev; } vot_freeNode (n); // free the node sub-tree } The vot_freeNode() method would need to walk the tree in a depth-first manner to free the Node strucutres. The vot_closeVOTable() then is simply a call to this on the root node - Fortran bindings should use the subroutine model to avoid having to declare numerous functions. E.g. The C method field = vot_getFIELD (tab) is used in Fortran as call getFIELD (tab, field) Implementation of the wrapper is then simply void getFIELD (handle_t parent, handle_t *field) { *field = vot_getFIELD (parent); } Methods that pass strings need to take into account the hidden length parameter used in fortran. E.g. The C method valueStr = vot_getAttr (handle, attr) is used in Fortran as call getAttr (handle, attr, valueStr) However, the implementation is void getAttr (handle_t parent, char *a, char *v, int alen, int vlen) { *v = vot_getAttr (parent, a); } The 'alen' and 'vlen' args are added by the Fortran compiler to pass in the length of the string. In some cases we may also want to pass in the max length of a result string so we don't overflow a buffer, or else return the length of the string we found. Since Fortran is call-by-address in either of these cases the length becomes a pointer declaration. ------------------------------------------------------------------------------ PseudoCode: ----------- 1) Read a VOTable, processing each RESOURCE in the file. Note we don't handle *nested* RESOURCEs here. vot = vot_openVOTable (fname) // loop over RESOURCES res = vot_getRESOURCE (vot, i); printf ("Table has toplevel %d RESOURCE elements\n", vot_getLength (res)); while ( res ) { tab = vot_getTABLE (res) // Print column info for (field=vot_getFIELD(tab); field; field=vot_getNext(field)) { strcpy (col[i].name, vot_getAttr (field, "name") strcpy (col[i].ucd, vot_getAttr (field, "ucd") } // Get the data element data = vot_getDATA (tab) switch (vot_getDataType (data)) case TY_TABLEDATA: // Get data stored as a TABLEDATA xml block tdata = vot_getTABLEDATA (data) if (use_direct) { // Get the table data cells by direct index tr = vot_getTR(tdata) nrows = vot_getLength (tr) ncols = vot_getLength (vot_getTD(tr)) for (l=0; l < nrows ; l++) { for (m=0; m < ncols; m++) { str = vot_getTableCell (tdata, l, m) } else { // Get the table data cells by looping over rows/cols for (tr=vot_getTR (tdata); tr; tr=vot_getNext(tr)) for (td=vot_getTD(tr); td; td=vot_getNext(td)) str = vot_getValue (td); } break case TY_BINARY: // Get data stored as inline binary. If the encoding of // the stream is base64 read the sequence of bytes and decode. bin = vot_getBINARY (data) stream = vot_getSTREAM (bin) if (strcmp ("base64", vot_getAttr(stream, "encoding") == 0) str = vot_getValue (stream) break case TY_FITS: // Read FITS data. Assumes a particular extension of an // MEF is available for download at the given href. fits = vot_getFITS (data) extnum = vot_getAttr (fits, "extnum") // get extension no. stream = vot_getSTREAM (fits) href = vot_getAttr (stream, "href") ....download the FITS file .... break default: error (0, "Invalid table DATA type.") } res = vot_getNext(res) // get next resource } vot_closeVOTable (vot) 2) Print all the PARAM elements in a table with a single RESOURCE a) Use the low-level interface dealing with document structure res = vot_getRESOURCE (vot) for (p = vot_getChild (res); p; p = vot_getSibling (p)) { if (vot_typeOf (p) == TY_PARAM) printf ("PARAM name=%s value=%s\n", vot_getAttr(p, "name"), vot_getAttr(p, "value")) } b) Use the common hi-level interface res = vot_getRESOURCE (vot) for (p = vot_getPARAM (res); p; p = vot_getNext (p)) { printf ("PARAM name=%s value=%s\n", vot_getAttr(p, "name"), vot_getAttr(p, "value")) } 3) Check a VOTable to see if it is an error return a) SCS-only (Preferred) Method - INFO as a child of VOTABLE vot = vot_openVOTable (fname) if ((info = vot_getINFO (vot)) { if (strcsecmp (vot_getAttr (info, "name"), "error") == 0) return ((errMsg = vot_getAttr (info, "value"))) } else return ("file is okay") b) Alternate Method - PARAM as a child of RESOURCE "allowed" for SCS, required for other DAL services. For SCS, the name/id are different, later DAL services use 'QUERY_STATUS'. vot = vot_openVOTable (fname) res = vot_getRESOURCE (vot) param = vot_getPARAM (res) info = vot_getINFO (res) if (strcasecmp(vot_getAttr (param, "name"), "error") == 0) { // SCS alternate method where PARAM defines value the error string return (vot_getAttr (param, "value")) } else if (strcasecmp(vot_getAttr(info,"name"),"QUERY_STATUS") == 0) { // All-other DAL methods where and INFO of the RESOURCE defines // a QUERY_STATUS of the result. if ( ((val = vot_getAttr (info, "value")), "OK") == 0) return (NULL); // no error else return (vot_getValue (info)) // return error message } NOTES: We should make this an interface convenience. A real error will be a minimal VOTable error return we can parse, but DAL2 services like TAP may experience overflow where we don't see an error result until a max-records/timeout is reached and the error INFO is at the end of the output. In this last case the error INFO isn't seen until after table data. To be real, we need to then search all INFO children of the RESOURCE rather than just the first one shown in the above. 4) Create a new VOTable from computed values vot = vot_openVOTable (NULL) // initialize res = vot_newNode (vot, TY_RESOURCE) // create empty resource vot_setAttr (res, "id", "newTable") // set table name desc = vot_newNode (vot, TY_DESCRIPTION) // set description vot_setValue (desc, "This is a test description") tab = vot_newNode (res, TY_TABLE) // create a TABLE for (i=0; i < 10; i++) { f = vot_newNode (tab, TY_FIELD) sprintf (colname, "col%d", i) vot_setAttr (f, "name", colname) vot_setAttr (f, "id", colname) : : } data = vot_newNode (tab, TY_DATA) // create a DATA tdata = vot_newNode (data, TY_TABLEDATA) // create a TABLEDATA for (i=0; i < nrows; i++) { tr = vot_newNode (tdata, TY_TR) // create a row for (j=0; j < ncols; j++) { td = vot_newNode (tr, TY_TD) // create a col vot_setValue (td, (char *)data[i,j]) } } info = vot_newNode (tab, TY_INFO) // create a DATA vot_setAttr (info, "id", "STATUS") vot_setAttr (info, "value", "OK") vot_writeVOTable (vot, stdout) // write it out 4-A) Create a new VOTable from computed values (Alternate Method) vot = vot_openVOTable (NULL) // initialize res = vot_newRESOURCE (vot) // create empty resource vot_setAttr (res, "id", "newTable") // set table name // set description desc = vot_newDESCRIPTION (vot, "This is a test description") tab = vot_newTABLE (res) // create a TABLE for (i=0; i < 10; i++) { f = vot_newFIELD (tab) sprintf (colname, "col%d", i) vot_setAttr (f, "name", colname) vot_setAttr (f, "id", colname) : : } data = vot_newDATA (tab) // create a DATA tdata = vot_newTABLEDATA (data) // create a TABLEDATA for (i=0; i < nrows; i++) { tr = vot_newTR (tdata) // create a row for (j=0; j < ncols; j++) { td = vot_newTD (tr) // create a col vot_setValue (td, (char *)data[i,j]) } } info = vot_newINFO (tab) // create a DATA vot_setAttr (info, "id", "STATUS") vot_setAttr (info, "value", "OK") vot_writeVOTable (vot, stdout) // write it out 5) Concatenate the RESOURCEs from two input table to a new output table vot1 = vot_openVOTABLE ("file1.xml") // open input tables vot2 = vot_openVOTABLE ("file2.xml") res1 = vot_getRESOURCE (vot1) // get resources res2 = vot_getRESOURCE (vot2) vot3 = vot_openVOTABLE (NULL) // open output table vot_attachNode (vot3, vot1) // add resources vot_attachNode (vot3, vot2) vot_writeVOTable (vot2, stdout) // write it out vot_closeVOTABLE (vot1) // clean up vot_closeVOTABLE (vot2) vot_closeVOTABLE (vot3) 6) Extract all the values in the columns specified by a GROUP vot = vot_openVOTable (fname) res = vot_getRESOURCE (vot) tab = vot_getTABLE (res) // Look for the GROUP by name for (group=vot_getGROUP(res); group; group=vot_getNext(group)) if (strcasecmp (vot_getAttr(group,"name"), "CoolStuff") == 0) break; // Gather the column numbers for the FIELDs in the GROUP for (fref=vot_getFIELDRef(group); fref; fref=vot_getNext(fref)) { // Get the field references ref = vot_getAttr (fref, "ref") // Find the FIELD with the referenced ID attribute, field = vot_findByAttr (tab, TY_FIELD, "ID", ref) // Convert to a column number to extract fp = vot_getField (tab) // start FIELD colnum = -1 while (fp && fp != field) { colnum++; // 0-indexed list fp = vot_getNext (fp) } cols[numInGroup++] = colnum } // Print out the data in selected columns data = vot_getDATA (tab) tdata = vot_getTABLEDATA (data) tr = vot_getTR(tdata) nrows = vot_getLength (tr) for (i=0; i < nrows ; i++) { for (j=0; j < numInGroup; j++) printf ("%s ", vot_getTableCell (tdata, i, cols[j]) printf ("\n") }