/** * VOTGET -- Download all access references in a VOTable. * * Usage: * * votget [-b ] [-c ] [-u ] [-v] [-x] [-o fname] * * Where * -b Base output filename * -e [] Extension to add to filename (or auto) * -f Download only specified * -h Print help summary * -s Use sequential file numbers * -t Input file is temporary, delete when done * -u Use ucd to identify acref column * * -o Output extracted filename (or 'stdout' or '-') * -v Verbose output * -x Extract access references * * -A Col number to use as acref column (0-indexed) * -B Background, i.e. run in forked child process * -C Cache the downloaded file * -D Set download directory * -F Col number to use as format column (0-indexed) * -N Number of simultaneous downloads * * +d debug output * * Name of file to process, or '-' for stdin */ int votget (int argc, char **argv); /** * Program Main. This is just a wrapper around the interface routine. */ int main (int argc, char **argv) { return votget (argc, argv); } /************************************************************************ * * * VOTGET -- Download the access references in a VOTable. * * * ************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "votParse.h" #define SZ_FNAME 256 /* max size of a file name */ #define SZ_URL 4096 /* max URL size */ #define MIN_THREADS 4 /* min no. simultaneous thread */ #define MAX_THREADS 64 /* max no. simultaneous threads */ #define MAX_DOWNLOADS 4096 /* max no. files to download */ #define MAX_TRYS 3 /* max download attempts */ #define NAXIS_UCD "VOX:Image_Naxis" #define NAXES_UCD "VOX:Image_Naxes" #define SCALE_UCD "VOX:Image_Scale" #define ACREF_UCD "VOX:Image_AccessReference" #define FORMAT_UCD "VOX:Image_Format" int vot = 0; /* VOTable handle */ int verbose = 0; /* verbose parameter */ int debug = 0; /* debug flag */ int extract = 0; /* extract references only */ int detach = 0; /* run as detached process */ int nfiles = 0; /* number of download files */ int ngot = 0; /* number of files downloaded */ int seq = 0; /* use sequential file numbers */ int isCache = 0; /* is this a cache file? */ int isTemp = 0; /* is this a temp file? */ int acol = -1; /* access reference column */ int tcol = -1; /* image type column */ int nthreads = MIN_THREADS; /* number of download threads */ int maxTrys = MAX_TRYS; /* download attempts */ char *base = "file"; /* output base filename */ char *extn = NULL; /* output filename extension */ char *dir = NULL; /* download directory */ char *afname = NULL; /* output acref filename */ char *acref = NULL; /* acref url */ char *acref_ucd = NULL; /* acref UCD */ char *fmt = NULL; /* image format */ char *fmt_ucd = NULL; /* image format UCD */ FILE *afd = (FILE *) NULL; /* acref file descriptor */ pthread_mutex_t counter_mut = PTHREAD_MUTEX_INITIALIZER; typedef struct { char url[SZ_URL]; /* access URL */ char fname[SZ_URL]; /* local filename */ int tnum; /* worker thread number */ } Acref, *AcrefP; Acref aclist[MAX_DOWNLOADS]; /* access list */ /* Private methods. */ static int vot_isVOTable (char *infile); static int vot_acrefColumn (handle_t tab); static int vot_typeColumn (handle_t tab); static int vot_getURL (char *url, char *ofname); static int vot_loadText (char *infile); static int vot_loadVOTable (char *infile); static void vot_saveAcref (char *acref, int num); static void *vot_getAclist (void *arg); static void vot_printAclist (); static void vot_Usage (); static unsigned int vot_sum32 (char *str); /** * Program entry point. */ int votget (int argc, char **argv) { int i, stat = OK; char *fname; if (argc < 2) { vot_Usage (); return (1); } else if (argc >= 2) { for (i=1; i < argc; i++) { if (argv[i][0] == '-' && strlen (argv[i]) > 1) { switch (argv[i][1]) { case 'h': vot_Usage (); return (0); case 'b': base = argv[++i]; break; case 'f': fmt = argv[++i]; break; case 's': seq++; break; case 't': isTemp++; break; case 'u': acref_ucd = argv[++i]; break; case 'o': afname = argv[++i]; break; case 'v': verbose++; break; case 'x': extract++; break; case 'A': acol = atoi(argv[++i]); break; case 'B': detach++; break; case 'C': isCache++; break; case 'D': dir = argv[++i]; break; case 'F': tcol = atoi(argv[++i]); break; case 'N': nthreads = atoi(argv[++i]); break; default: fprintf (stderr, "Invalid argument '%c'\n", argv[i][1]); return (1); } } else if (argv[i][0] == '+' && strlen (argv[i]) > 1) { switch (argv[i][1]) { case 'd': debug++; break; } } else fname = argv[i]; } } /* Setup defaults and initialize. */ memset (&aclist[0], 0, MAX_DOWNLOADS); if (!fmt_ucd) fmt_ucd = FORMAT_UCD; if (!acref_ucd) acref_ucd = ACREF_UCD; if (afname && (afd = fopen (afname, "w+")) == (FILE *) NULL) { if (verbose) fprintf (stderr, "Error: cannot open output file '%s'\n", afname); return (ERR); } /* Determine the type of input file. */ switch ((vot = vot_isVOTable (fname))) { case -1: stat = ERR; goto done_; case 0: vot_loadText (fname); break; case 1: vot_loadVOTable (fname); break; } /* If all we're doing is extracting the URLs we can quit now. */ if (extract) goto done_; if (debug) { fprintf (stderr, "acol = %d tcol = %d\n", acol, tcol); fprintf (stderr, "Downloading %d files ....\n", nfiles); vot_printAclist (); } /* If we've been asked to detach, fork off to do the downloads in * a child, and return to the caller. */ if (detach) { pid_t pid; switch ((pid = fork ())) { case -1: return (ERR); /* We are an error */ case 0: break; /* We are the child */ default: return (OK); /* We are the parent */ } } /* Initialize the download directory. */ if (dir) { if (access (dir, F_OK) < 0) mkdir (dir, 0644); if (access (dir, W_OK) < 0) { if (verbose) fprintf (stderr, "Error: Cannot write to directory '%s'\n", dir); return (1); } chdir (dir); } /* Do the downloads. */ if (nfiles < MIN_THREADS) nthreads = nfiles; if (nthreads == 1) { vot_getAclist (NULL); } else { /* Spawn the worker threads. */ int rc = 0, tc = 0, status = 0, tnum[MAX_THREADS]; pthread_attr_t attr; /* thread attributes */ pthread_t thread[MAX_THREADS]; /* Initialize the service processing thread attributes and run 'em. */ pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE); if (verbose) fprintf (stderr, "Starting download ....\r"); for (tc=0; tc < nthreads; tc++) { tnum[tc] = tc; if ((rc = pthread_create (&thread[tc], &attr, vot_getAclist, (void *)&tnum[tc]))) { fprintf (stderr, "ERROR: pthread_create() fails, code=%d\n", rc); exit (-1); } } /* Free attribute and wait for the threads to complete. */ pthread_attr_destroy (&attr); for (tc=0; tc < nthreads; tc++) { if ((rc = pthread_join (thread[tc], (void **)&status)) ) { if (rc != ESRCH) { fprintf (stderr, "ERROR: pthread_join() fails, code=%d status=%d\n", rc, status); exit (-1); } } } if (verbose) { fprintf (stderr, "Downloaded %d files -- Download complete\n", nfiles); fflush (stderr); } } /* Remove input file if it is temporary. */ if (isTemp) unlink (fname); /* Close the table and clean up. */ done_: return (OK); } /********************************************************************** ** Private Procedures **********************************************************************/ /** * VOT_USAGE -- Print the task usage and exit. */ static void vot_Usage () { fprintf (stderr, "\n" " Usage:\n" "\n" " votget [-b ] [-c ] [-u ] [-v] [-x] [-o fname] \n" "\n" " Where\n" " -b Base output filename\n" " -e [] Extension to add to filename (or auto)\n" " -f Download only specified \n" " -h Print help summary\n" " -s Use sequential file numbers\n" " -t Input file is temporary, delete when done\n" " -u Use ucd to identify acref column\n" "\n" " -o Output extracted filename (or 'stdout' or\n" " '-')\n" " -v Verbose output\n" " -x Extract access references\n" "\n" " -A Col number to use as acref column (0-indexed)\n" " -B Background, i.e. run in forked child process\n" " -C Cache the downloaded file\n" " -D Set download directory\n" " -F Col number to use as format column\n" " (0-indexed)\n" " -N Number of simultaneous downloads\n" "\n" " +d debug output\n" "\n" " Name of file to process, or '-' for stdin\n" ); } /** * VOT_LOADTEXT -- Load the access list from a text file. We assume the * list is simply one url per line. */ static int vot_loadText (char *infile) { int i = 0, fd, nread = 0, tnum = 0, sz = 0; char *acref, *buf, *ip; struct stat info; if ((fd = open (infile, O_RDONLY)) < 0) return (-1); if (stat (infile, &info) < 0) /* read the whole file */ return (-1); sz = info.st_size; buf = calloc (sz + 1, sizeof(char)); nread = read (fd, buf, sz); close (fd); acref = buf; /* point to 1st url */ for (i=0; *acref; i++) { for (ip=acref; *ip && *ip != '\n'; ip++) ; *ip = '\0'; vot_saveAcref (acref, i); acref = ip + 1; nfiles++; tnum++; } if (afd) /* close the acref file */ fclose (afd); return (nfiles); } /** * VOT_LOADVOTABLE -- Load the access list from a VOTable. */ static int vot_loadVOTable (char *infile) { int i, tnum = 0; int res, tab, data, tdata, tr; char *acref; /* Open the table. This also parses it. */ if ( (vot = vot_openVOTABLE (infile) ) <= 0) { if (verbose) fprintf (stderr, "Error opening VOTable '%s'\n", infile); return (ERR); } /* Loop over all the resources in the file. In most cases there will * only be one , if not then the selection applies to all * valid tables. */ for (res = vot_getRESOURCE (vot); res; res = vot_getNext (res)) { /* Get the element. */ if (! (tab = vot_getTABLE (res))) { if (verbose) fprintf (stderr, "Error: No
in \n"); continue; } data = vot_getDATA (tab); tdata = vot_getTABLEDATA (data); /* Loop through the FIELDs to find the acref. Let the cmdline param * override the acref column ucd. */ acol = (acol < 0 ? vot_acrefColumn (tab) : acol); tcol = (tcol < 0 ? vot_typeColumn (tab) : tcol); /* Now scan the data table for acrefs. We got the acref column above * so lookup the table cell directly for each row, either printing * out the acref for a simple extract, or by adding to the access * list to be processed below. */ i = 0; for (tr=vot_getTR (tdata); tr; tr=vot_getNext(tr)) { acref = vot_getTableCell (tdata, i, acol); if (tcol >= 0) { char *format = vot_getTableCell (tdata, i, tcol); if (format && fmt && strcasestr (format, fmt) == NULL) continue; } vot_saveAcref (acref, i); nfiles++; tnum++; i++; } } vot_closeVOTABLE (vot); if (afd) /* close the acref file */ fclose (afd); return (nfiles); } /** * VOT_SAVEACREF -- Save the URL to the access list. */ static void vot_saveAcref (char *acref, int num) { if (afd) fprintf (afd, "%s\n", acref); else if (extract) printf ("%s\n", acref); else { /* Save to the access list. */ aclist[num].tnum = ((nthreads == 1) ? 0 : (num % nthreads)); strcpy (aclist[num].url, acref); sprintf (aclist[num].fname, "%s%u", base, (seq ? num : vot_sum32 (acref)) ); } } /** * VOT_ISVOTABLE -- Determine in the input file is a VOTable or URL @file. * We return zero if the file cannot be parsed as a valid VOTable (i.e. * we assume it is an @file of URLs), or else we return the root handle to * the parsed file. */ #define SZ_READ 2880 static int vot_isVOTable (char *infile) { FILE *fd = (FILE *) NULL; char buf[SZ_READ]; register int nread;; /* read the first 1024 bytes and search for a 'votable' string... */ if (access (infile, F_OK) < 0) { if (verbose) fprintf (stderr, "Error: Cannot open input file '%s'\n", infile); return (-1); } else if ((fd = fopen (infile, "r"))) { memset (buf, 0, SZ_READ); nread = fread (buf, sizeof (char), SZ_READ, fd); fclose (fd); return (strcasestr (buf, "votable") ? 1 : 0); } return ( 0 ); } /** * VOT_ACREFCOLUMN -- Determine the access column for the given table. */ static int vot_acrefColumn (handle_t tab) { register int i = 0, acol = -1; handle_t field; char *ucd; /* Loop through the FIELDs to find the acref. */ for (field=vot_getFIELD(tab); field; field=vot_getNext(field),i++) { ucd = vot_getAttr (field, "ucd"); if (ucd && strcasecmp (acref_ucd, ucd) == 0) { acol = i; break; } } if (acol < 0) { /* make sure we found a column */ if (verbose) fprintf (stderr, "Error: no acref column found (%s)\n", acref); return (-1); } return (acol); } /** * VOT_TYPECOLUMN -- Determine the type column for the given table. */ static int vot_typeColumn (handle_t tab) { register int i = 0; handle_t field; char *ucd; /* Loop through the FIELDs to find the type. Use a generous match. */ if (tcol < 0) { for (field=vot_getFIELD(tab); field; field=vot_getNext(field),i++) { ucd = vot_getAttr (field, "ucd"); if (ucd && strcasestr (ucd, fmt_ucd)) { tcol = i; break; } } } return (tcol); } /** * VOT_GETACLIST -- Download all the files for the specified thread. */ static void * vot_getAclist (void *arg) { register int i, j, done = 0, ret = 0; int threadNum = 0; if (arg) threadNum = *(int *)arg; for (i=0; i < nfiles; i++) { if (aclist[i].tnum == threadNum) { for (j=0; j < maxTrys; j++) if ((ret = vot_getURL (aclist[i].url, aclist[i].fname))) break; done += ret; } } pthread_exit (NULL); } /** * VOT_GETURL -- Utility routine to do a simple URL download to the file. */ static int vot_getURL (char *url, char *ofname) { int stat = 0; char lockfile[SZ_FNAME], dot[SZ_FNAME], errBuf[CURL_ERROR_SIZE]; char fname[SZ_FNAME]; FILE *fd; CURL *curl_handle; /* Initialize the lock file. */ memset (lockfile, 0, SZ_FNAME); memset (dot, 0, SZ_FNAME); sprintf (lockfile, ".%s.LOCK", ofname); sprintf (dot, ".%s", ofname); if (access (lockfile, F_OK) == 0 && access (dot, F_OK) < 0) { /* Download currently in progress, perhaps on another thread? */ return (0); } else if (access (lockfile, F_OK) == 0 && access (dot, F_OK) == 0) { /* Download complete, stray lockfile. */ unlink (lockfile); } else if (access (lockfile, F_OK) < 0) { /* No lock file, create one. */ creat (lockfile, O_CREAT); } /* Append filename extension if specified. */ if (extn) sprintf (fname, "%s.%s", ofname, extn); else strcpy (fname, ofname); /* For the CURL operation to download the file. */ curl_global_init (CURL_GLOBAL_ALL); /* init curl session */ curl_handle = curl_easy_init (); if ((fd = fopen (fname, "wb")) == NULL) { /* open the output file */ if (verbose) fprintf (stderr, "Error: cannot open output file '%s'\n", fname); curl_easy_cleanup (curl_handle); return 0; } /* Set cURL options */ curl_easy_setopt (curl_handle, CURLOPT_URL, url); curl_easy_setopt (curl_handle, CURLOPT_NOPROGRESS, 1L); curl_easy_setopt (curl_handle, CURLOPT_WRITEDATA, fd); curl_easy_setopt (curl_handle, CURLOPT_ERRORBUFFER, errBuf); /* Do the download. */ if ((stat = curl_easy_perform (curl_handle)) != 0) { /* Error in download, clean up. */ if (verbose) fprintf (stderr, "Error: can't download '%s' : %s\n", url, errBuf); unlink (fname); unlink (lockfile); fclose (fd); /* close the file */ curl_easy_cleanup (curl_handle); /* cleanup curl stuff */ return (0); } fclose (fd); /* close the file */ curl_easy_cleanup (curl_handle); /* cleanup curl stuff */ /* Save the URL to a "dotfile" is we're downloading to a cache. */ if (isCache) { if ((fd = fopen (dot, "w")) == NULL) { /* open cache file */ if (verbose) fprintf (stderr, "Error: cannot open cache file '%s'\n", dot); return 0; } fprintf (fd, "%s\n", url); fclose (fd); } /* If we didn't specify an extension, try to determin the file type * automatically. */ if (!extn) { int dfd; if ((dfd = open (fname, O_RDONLY)) > 0) { char buf[1024], new[SZ_FNAME]; (void) read (dfd, buf, 1024); memset (new, 0, SZ_FNAME); if (strncmp ("SIMPLE", buf, 6) == 0) { /* FITS */ sprintf (new, "%s.fits", fname); rename (fname, new); } close (dfd); } } pthread_mutex_lock (&counter_mut); ngot++; if (verbose) { fprintf (stderr, "Downloaded %d of %d files ....\r", ngot, nfiles); fflush (stderr); } pthread_mutex_unlock (&counter_mut); /* Remove the lock file to indicate we are done. */ unlink (lockfile); return (1); } /** * VOT_SUM32 -- Internet checksum, 32 bit unsigned integer version. */ static unsigned int vot_sum32 (char *str) { register int i; unsigned int *iarray, sum = 0; int len, carry=0, newcarry=0; iarray = (unsigned int *) str; len = strlen (str) / 4; for (i=0; i ~ sum) carry++; sum += iarray[i]; } while (carry) { if (carry > ~ sum) newcarry++; sum += carry; carry = newcarry; newcarry = 0; } return (sum); } /****************************************************************************** ** Debug Utilities ******************************************************************************/ /** * VOT_GETACLIST -- Download all the files for the specified thread. */ static void vot_printAclist () { register int i; fprintf (stderr, "\nAccess List: nfiles = %d\n", nfiles); for (i=0; i < nfiles; i++) { fprintf (stderr, "%2d: url='%20.20s...' fname='%s' tnum=%d\n", i, aclist[i].url, aclist[i].fname, aclist[i].tnum); } }