diff options
Diffstat (limited to 'vendor/voclient/libvotable/examples/votget.c')
-rw-r--r-- | vendor/voclient/libvotable/examples/votget.c | 801 |
1 files changed, 801 insertions, 0 deletions
diff --git a/vendor/voclient/libvotable/examples/votget.c b/vendor/voclient/libvotable/examples/votget.c new file mode 100644 index 00000000..15ddac32 --- /dev/null +++ b/vendor/voclient/libvotable/examples/votget.c @@ -0,0 +1,801 @@ +/** + * VOTGET -- Download all access references in a VOTable. + * + * Usage: + * + * votget [-b <base>] [-c <col>] [-u <ucd>] [-v] [-x] [-o fname] <file> + * + * Where + * -b <base> Base output filename + * -e [<extn>] Extension to add to filename (or auto) + * -f <fmt> Download only specified <type> + * -h Print help summary + * -s Use sequential file numbers + * -t Input file is temporary, delete when done + * -u <ucd> Use ucd to identify acref column + * + * -o <fname> Output extracted filename (or 'stdout' or '-') + * -v Verbose output + * -x Extract access references + * + * -A <colnum> Col number to use as acref column (0-indexed) + * -B Background, i.e. run in forked child process + * -C Cache the downloaded file + * -D Set download directory + * -F <colnum> Col number to use as format column (0-indexed) + * -N <N> Number of simultaneous downloads + * + * +d debug output + * + * <file> Name of file to process, or '-' for stdin + */ + + +int votget (int argc, char **argv); + +/** + * Program Main. This is just a wrapper around the interface routine. + */ +int +main (int argc, char **argv) +{ + return votget (argc, argv); +} + + + +/************************************************************************ + * * + * VOTGET -- Download the access references in a VOTable. * + * * + ************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/stat.h> +#include <pthread.h> + +#include <curl/curl.h> +#include <curl/types.h> +#include <curl/easy.h> + +#include "votParse.h" + + +#define SZ_FNAME 256 /* max size of a file name */ +#define SZ_URL 4096 /* max URL size */ +#define MIN_THREADS 4 /* min no. simultaneous thread */ +#define MAX_THREADS 64 /* max no. simultaneous threads */ +#define MAX_DOWNLOADS 4096 /* max no. files to download */ +#define MAX_TRYS 3 /* max download attempts */ + +#define NAXIS_UCD "VOX:Image_Naxis" +#define NAXES_UCD "VOX:Image_Naxes" +#define SCALE_UCD "VOX:Image_Scale" +#define ACREF_UCD "VOX:Image_AccessReference" +#define FORMAT_UCD "VOX:Image_Format" + + +int vot = 0; /* VOTable handle */ +int verbose = 0; /* verbose parameter */ +int debug = 0; /* debug flag */ +int extract = 0; /* extract references only */ +int detach = 0; /* run as detached process */ +int nfiles = 0; /* number of download files */ +int ngot = 0; /* number of files downloaded */ +int seq = 0; /* use sequential file numbers */ +int isCache = 0; /* is this a cache file? */ +int isTemp = 0; /* is this a temp file? */ +int acol = -1; /* access reference column */ +int tcol = -1; /* image type column */ + +int nthreads = MIN_THREADS; /* number of download threads */ +int maxTrys = MAX_TRYS; /* download attempts */ + +char *base = "file"; /* output base filename */ +char *extn = NULL; /* output filename extension */ +char *dir = NULL; /* download directory */ +char *afname = NULL; /* output acref filename */ + +char *acref = NULL; /* acref url */ +char *acref_ucd = NULL; /* acref UCD */ +char *fmt = NULL; /* image format */ +char *fmt_ucd = NULL; /* image format UCD */ + +FILE *afd = (FILE *) NULL; /* acref file descriptor */ + +pthread_mutex_t counter_mut = PTHREAD_MUTEX_INITIALIZER; + + +typedef struct { + char url[SZ_URL]; /* access URL */ + char fname[SZ_URL]; /* local filename */ + int tnum; /* worker thread number */ +} Acref, *AcrefP; + +Acref aclist[MAX_DOWNLOADS]; /* access list */ + + +/* Private methods. + */ +static int vot_isVOTable (char *infile); +static int vot_acrefColumn (handle_t tab); +static int vot_typeColumn (handle_t tab); +static int vot_getURL (char *url, char *ofname); +static int vot_loadText (char *infile); +static int vot_loadVOTable (char *infile); + +static void vot_saveAcref (char *acref, int num); +static void *vot_getAclist (void *arg); +static void vot_printAclist (); +static void vot_Usage (); + +static unsigned int vot_sum32 (char *str); + + + +/** + * Program entry point. + */ +int +votget (int argc, char **argv) +{ + int i, stat = OK; + char *fname; + + + if (argc < 2) { + vot_Usage (); + return (1); + + } else if (argc >= 2) { + for (i=1; i < argc; i++) { + if (argv[i][0] == '-' && strlen (argv[i]) > 1) { + switch (argv[i][1]) { + + case 'h': vot_Usage (); return (0); + + case 'b': base = argv[++i]; break; + case 'f': fmt = argv[++i]; break; + case 's': seq++; break; + case 't': isTemp++; break; + case 'u': acref_ucd = argv[++i]; break; + + case 'o': afname = argv[++i]; break; + case 'v': verbose++; break; + case 'x': extract++; break; + + case 'A': acol = atoi(argv[++i]); break; + case 'B': detach++; break; + case 'C': isCache++; break; + case 'D': dir = argv[++i]; break; + case 'F': tcol = atoi(argv[++i]); break; + case 'N': nthreads = atoi(argv[++i]); break; + + default: + fprintf (stderr, "Invalid argument '%c'\n", argv[i][1]); + return (1); + } + } else if (argv[i][0] == '+' && strlen (argv[i]) > 1) { + switch (argv[i][1]) { + case 'd': debug++; break; + } + } else + fname = argv[i]; + } + } + + + /* Setup defaults and initialize. + */ + memset (&aclist[0], 0, MAX_DOWNLOADS); + if (!fmt_ucd) + fmt_ucd = FORMAT_UCD; + if (!acref_ucd) + acref_ucd = ACREF_UCD; + + if (afname && (afd = fopen (afname, "w+")) == (FILE *) NULL) { + if (verbose) + fprintf (stderr, "Error: cannot open output file '%s'\n", afname); + return (ERR); + } + + + /* Determine the type of input file. + */ + switch ((vot = vot_isVOTable (fname))) { + case -1: stat = ERR; goto done_; + case 0: vot_loadText (fname); break; + case 1: vot_loadVOTable (fname); break; + } + + + /* If all we're doing is extracting the URLs we can quit now. + */ + if (extract) + goto done_; + + if (debug) { + fprintf (stderr, "acol = %d tcol = %d\n", acol, tcol); + fprintf (stderr, "Downloading %d files ....\n", nfiles); + vot_printAclist (); + } + + + /* If we've been asked to detach, fork off to do the downloads in + * a child, and return to the caller. + */ + if (detach) { + pid_t pid; + + switch ((pid = fork ())) { + case -1: return (ERR); /* We are an error */ + case 0: break; /* We are the child */ + default: return (OK); /* We are the parent */ + } + } + + + /* Initialize the download directory. + */ + if (dir) { + if (access (dir, F_OK) < 0) + mkdir (dir, 0644); + if (access (dir, W_OK) < 0) { + if (verbose) + fprintf (stderr, "Error: Cannot write to directory '%s'\n", dir); + return (1); + } + chdir (dir); + } + + /* Do the downloads. + */ + if (nfiles < MIN_THREADS) + nthreads = nfiles; + + if (nthreads == 1) { + vot_getAclist (NULL); + + } else { + /* Spawn the worker threads. + */ + int rc = 0, tc = 0, status = 0, tnum[MAX_THREADS]; + pthread_attr_t attr; /* thread attributes */ + pthread_t thread[MAX_THREADS]; + + + /* Initialize the service processing thread attributes and run 'em. + */ + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE); + + if (verbose) + fprintf (stderr, "Starting download ....\r"); + + for (tc=0; tc < nthreads; tc++) { + tnum[tc] = tc; + if ((rc = pthread_create (&thread[tc], &attr, vot_getAclist, + (void *)&tnum[tc]))) { + fprintf (stderr, "ERROR: pthread_create() fails, code=%d\n", + rc); + exit (-1); + } + } + + /* Free attribute and wait for the threads to complete. + */ + pthread_attr_destroy (&attr); + for (tc=0; tc < nthreads; tc++) { + if ((rc = pthread_join (thread[tc], (void **)&status)) ) { + if (rc != ESRCH) { + fprintf (stderr, + "ERROR: pthread_join() fails, code=%d status=%d\n", + rc, status); + exit (-1); + } + } + } + + if (verbose) { + fprintf (stderr, "Downloaded %d files -- Download complete\n", + nfiles); + fflush (stderr); + } + } + + + /* Remove input file if it is temporary. + */ + if (isTemp) + unlink (fname); + + /* Close the table and clean up. + */ +done_: + + return (OK); +} + + +/********************************************************************** +** Private Procedures +**********************************************************************/ + + +/** + * VOT_USAGE -- Print the task usage and exit. + */ +static void +vot_Usage () +{ +fprintf (stderr, + "\n" + " Usage:\n" + "\n" + " votget [-b <base>] [-c <col>] [-u <ucd>] [-v] [-x] [-o fname] <file>\n" + "\n" + " Where\n" + " -b <base> Base output filename\n" + " -e [<extn>] Extension to add to filename (or auto)\n" + " -f <fmt> Download only specified <type>\n" + " -h Print help summary\n" + " -s Use sequential file numbers\n" + " -t Input file is temporary, delete when done\n" + " -u <ucd> Use ucd to identify acref column\n" + "\n" + " -o <fname> Output extracted filename (or 'stdout' or\n" + " '-')\n" + " -v Verbose output\n" + " -x Extract access references\n" + "\n" + " -A <colnum> Col number to use as acref column (0-indexed)\n" + " -B Background, i.e. run in forked child process\n" + " -C Cache the downloaded file\n" + " -D Set download directory\n" + " -F <colnum> Col number to use as format column\n" + " (0-indexed)\n" + " -N <N> Number of simultaneous downloads\n" + "\n" + " +d debug output\n" + "\n" + " <file> Name of file to process, or '-' for stdin\n" + ); +} + + + +/** + * VOT_LOADTEXT -- Load the access list from a text file. We assume the + * list is simply one url per line. + */ +static int +vot_loadText (char *infile) +{ + int i = 0, fd, nread = 0, tnum = 0, sz = 0; + char *acref, *buf, *ip; + struct stat info; + + if ((fd = open (infile, O_RDONLY)) < 0) + return (-1); + + if (stat (infile, &info) < 0) /* read the whole file */ + return (-1); + sz = info.st_size; + buf = calloc (sz + 1, sizeof(char)); + nread = read (fd, buf, sz); + close (fd); + + acref = buf; /* point to 1st url */ + for (i=0; *acref; i++) { + for (ip=acref; *ip && *ip != '\n'; ip++) + ; + *ip = '\0'; + + vot_saveAcref (acref, i); + + acref = ip + 1; + nfiles++; + tnum++; + } + + if (afd) /* close the acref file */ + fclose (afd); + + return (nfiles); +} + + +/** + * VOT_LOADVOTABLE -- Load the access list from a VOTable. + */ +static int +vot_loadVOTable (char *infile) +{ + int i, tnum = 0; + int res, tab, data, tdata, tr; + char *acref; + + + /* Open the table. This also parses it. + */ + if ( (vot = vot_openVOTABLE (infile) ) <= 0) { + if (verbose) + fprintf (stderr, "Error opening VOTable '%s'\n", infile); + return (ERR); + } + + /* Loop over all the resources in the file. In most cases there will + * only be one <RESOURCE>, if not then the selection applies to all + * valid tables. + */ + for (res = vot_getRESOURCE (vot); res; res = vot_getNext (res)) { + + /* Get the <TABLE> element. + */ + if (! (tab = vot_getTABLE (res))) { + if (verbose) fprintf (stderr, "Error: No <TABLE> in <RESOURCE>\n"); + continue; + } + data = vot_getDATA (tab); + tdata = vot_getTABLEDATA (data); + + /* Loop through the FIELDs to find the acref. Let the cmdline param + * override the acref column ucd. + */ + acol = (acol < 0 ? vot_acrefColumn (tab) : acol); + tcol = (tcol < 0 ? vot_typeColumn (tab) : tcol); + + /* Now scan the data table for acrefs. We got the acref column above + * so lookup the table cell directly for each row, either printing + * out the acref for a simple extract, or by adding to the access + * list to be processed below. + */ + i = 0; + for (tr=vot_getTR (tdata); tr; tr=vot_getNext(tr)) { + acref = vot_getTableCell (tdata, i, acol); + if (tcol >= 0) { + char *format = vot_getTableCell (tdata, i, tcol); + + if (format && fmt && strcasestr (format, fmt) == NULL) + continue; + } + + vot_saveAcref (acref, i); + + nfiles++; + tnum++; + i++; + } + } + + vot_closeVOTABLE (vot); + if (afd) /* close the acref file */ + fclose (afd); + + return (nfiles); +} + + +/** + * VOT_SAVEACREF -- Save the URL to the access list. + */ +static void +vot_saveAcref (char *acref, int num) +{ + if (afd) + fprintf (afd, "%s\n", acref); + else if (extract) + printf ("%s\n", acref); + else { + /* Save to the access list. + */ + aclist[num].tnum = ((nthreads == 1) ? 0 : (num % nthreads)); + strcpy (aclist[num].url, acref); + sprintf (aclist[num].fname, "%s%u", base, + (seq ? num : vot_sum32 (acref)) ); + } +} + + +/** + * VOT_ISVOTABLE -- Determine in the input file is a VOTable or URL @file. + * We return zero if the file cannot be parsed as a valid VOTable (i.e. + * we assume it is an @file of URLs), or else we return the root handle to + * the parsed file. + */ + +#define SZ_READ 2880 + +static int +vot_isVOTable (char *infile) +{ + FILE *fd = (FILE *) NULL; + char buf[SZ_READ]; + register int nread;; + + + /* read the first 1024 bytes and search for a 'votable' string... */ + if (access (infile, F_OK) < 0) { + if (verbose) + fprintf (stderr, "Error: Cannot open input file '%s'\n", infile); + return (-1); + + } else if ((fd = fopen (infile, "r"))) { + memset (buf, 0, SZ_READ); + nread = fread (buf, sizeof (char), SZ_READ, fd); + fclose (fd); + + return (strcasestr (buf, "votable") ? 1 : 0); + } + return ( 0 ); +} + + +/** + * VOT_ACREFCOLUMN -- Determine the access column for the given table. + */ +static int +vot_acrefColumn (handle_t tab) +{ + register int i = 0, acol = -1; + handle_t field; + char *ucd; + + + /* Loop through the FIELDs to find the acref. + */ + for (field=vot_getFIELD(tab); field; field=vot_getNext(field),i++) { + ucd = vot_getAttr (field, "ucd"); + if (ucd && strcasecmp (acref_ucd, ucd) == 0) { + acol = i; + break; + } + } + + if (acol < 0) { /* make sure we found a column */ + if (verbose) + fprintf (stderr, "Error: no acref column found (%s)\n", acref); + return (-1); + } + + return (acol); +} + + +/** + * VOT_TYPECOLUMN -- Determine the type column for the given table. + */ +static int +vot_typeColumn (handle_t tab) +{ + register int i = 0; + handle_t field; + char *ucd; + + + /* Loop through the FIELDs to find the type. Use a generous match. + */ + if (tcol < 0) { + for (field=vot_getFIELD(tab); field; field=vot_getNext(field),i++) { + ucd = vot_getAttr (field, "ucd"); + if (ucd && strcasestr (ucd, fmt_ucd)) { + tcol = i; + break; + } + } + } + + return (tcol); +} + + +/** + * VOT_GETACLIST -- Download all the files for the specified thread. + */ +static void * +vot_getAclist (void *arg) +{ + register int i, j, done = 0, ret = 0; + int threadNum = 0; + + if (arg) + threadNum = *(int *)arg; + + for (i=0; i < nfiles; i++) { + if (aclist[i].tnum == threadNum) { + for (j=0; j < maxTrys; j++) + if ((ret = vot_getURL (aclist[i].url, aclist[i].fname))) + break; + done += ret; + } + } + + pthread_exit (NULL); +} + + +/** + * VOT_GETURL -- Utility routine to do a simple URL download to the file. + */ +static int +vot_getURL (char *url, char *ofname) +{ + int stat = 0; + char lockfile[SZ_FNAME], dot[SZ_FNAME], errBuf[CURL_ERROR_SIZE]; + char fname[SZ_FNAME]; + FILE *fd; + CURL *curl_handle; + + + /* Initialize the lock file. + */ + memset (lockfile, 0, SZ_FNAME); + memset (dot, 0, SZ_FNAME); + + sprintf (lockfile, ".%s.LOCK", ofname); + sprintf (dot, ".%s", ofname); + + if (access (lockfile, F_OK) == 0 && access (dot, F_OK) < 0) { + /* Download currently in progress, perhaps on another thread? + */ + return (0); + } else if (access (lockfile, F_OK) == 0 && access (dot, F_OK) == 0) { + /* Download complete, stray lockfile. + */ + unlink (lockfile); + } else if (access (lockfile, F_OK) < 0) { + /* No lock file, create one. + */ + creat (lockfile, O_CREAT); + } + + + /* Append filename extension if specified. + */ + if (extn) + sprintf (fname, "%s.%s", ofname, extn); + else + strcpy (fname, ofname); + + + /* For the CURL operation to download the file. + */ + curl_global_init (CURL_GLOBAL_ALL); /* init curl session */ + curl_handle = curl_easy_init (); + + if ((fd = fopen (fname, "wb")) == NULL) { /* open the output file */ + if (verbose) + fprintf (stderr, "Error: cannot open output file '%s'\n", fname); + curl_easy_cleanup (curl_handle); + return 0; + } + + /* Set cURL options + */ + curl_easy_setopt (curl_handle, CURLOPT_URL, url); + curl_easy_setopt (curl_handle, CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt (curl_handle, CURLOPT_WRITEDATA, fd); + curl_easy_setopt (curl_handle, CURLOPT_ERRORBUFFER, errBuf); + + /* Do the download. + */ + if ((stat = curl_easy_perform (curl_handle)) != 0) { + /* Error in download, clean up. + */ + if (verbose) + fprintf (stderr, "Error: can't download '%s' : %s\n", url, errBuf); + unlink (fname); unlink (lockfile); + fclose (fd); /* close the file */ + curl_easy_cleanup (curl_handle); /* cleanup curl stuff */ + return (0); + } + + fclose (fd); /* close the file */ + curl_easy_cleanup (curl_handle); /* cleanup curl stuff */ + + /* Save the URL to a "dotfile" is we're downloading to a cache. + */ + if (isCache) { + if ((fd = fopen (dot, "w")) == NULL) { /* open cache file */ + if (verbose) + fprintf (stderr, "Error: cannot open cache file '%s'\n", dot); + return 0; + } + fprintf (fd, "%s\n", url); + fclose (fd); + } + + /* If we didn't specify an extension, try to determin the file type + * automatically. + */ + if (!extn) { + int dfd; + + if ((dfd = open (fname, O_RDONLY)) > 0) { + char buf[1024], new[SZ_FNAME]; + + (void) read (dfd, buf, 1024); + + memset (new, 0, SZ_FNAME); + if (strncmp ("SIMPLE", buf, 6) == 0) { /* FITS */ + sprintf (new, "%s.fits", fname); + rename (fname, new); + } + close (dfd); + } + } + + pthread_mutex_lock (&counter_mut); + ngot++; + if (verbose) { + fprintf (stderr, "Downloaded %d of %d files ....\r", ngot, nfiles); + fflush (stderr); + } + pthread_mutex_unlock (&counter_mut); + + /* Remove the lock file to indicate we are done. + */ + unlink (lockfile); + + return (1); +} + + +/** + * VOT_SUM32 -- Internet checksum, 32 bit unsigned integer version. + */ + +static unsigned int +vot_sum32 (char *str) +{ + register int i; + unsigned int *iarray, sum = 0; + int len, carry=0, newcarry=0; + + iarray = (unsigned int *) str; + len = strlen (str) / 4; + + for (i=0; i<len; i++) { + if (iarray[i] > ~ sum) + carry++; + sum += iarray[i]; + } + + while (carry) { + if (carry > ~ sum) + newcarry++; + sum += carry; + carry = newcarry; + newcarry = 0; + } + + return (sum); +} + + + +/****************************************************************************** +** Debug Utilities +******************************************************************************/ + +/** + * VOT_GETACLIST -- Download all the files for the specified thread. + */ +static void +vot_printAclist () +{ + register int i; + + fprintf (stderr, "\nAccess List: nfiles = %d\n", nfiles); + for (i=0; i < nfiles; i++) { + fprintf (stderr, "%2d: url='%20.20s...' fname='%s' tnum=%d\n", + i, aclist[i].url, aclist[i].fname, aclist[i].tnum); + } +} |