diff options
author | Joseph Hunkeler <jhunkeler@gmail.com> | 2015-07-08 20:46:52 -0400 |
---|---|---|
committer | Joseph Hunkeler <jhunkeler@gmail.com> | 2015-07-08 20:46:52 -0400 |
commit | fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch) | |
tree | bdda434976bc09c864f2e4fa6f16ba1952b1e555 /sys/etc/urlget.x | |
download | iraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz |
Initial commit
Diffstat (limited to 'sys/etc/urlget.x')
-rw-r--r-- | sys/etc/urlget.x | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/sys/etc/urlget.x b/sys/etc/urlget.x new file mode 100644 index 00000000..23270fed --- /dev/null +++ b/sys/etc/urlget.x @@ -0,0 +1,384 @@ +# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc. + +include <syserr.h> +include <ctype.h> +include <mach.h> +include <fset.h> + + +# HTTP error codes we care about + +define HTTP_OK 200 # Success +define HTTP_CREATED 201 # Created +define HTTP_ACCEPTED 202 # Accepted +define HTTP_PARTIAL 203 # Partial Information +define HTTP_NORESP 204 # No Response + +define HTTP_MOVED 301 # Moved +define HTTP_FOUND 302 # Found +define HTTP_SEEOTHER 303 # Method +define HTTP_NOTMOD 304 # Not Modified + +define HTTP_BADREQ 400 # Bad Request +define HTTP_UNAUTH 401 # Unauthorized +define HTTP_PAYMENT 402 # Payment Required +define HTTP_FORBIDDEN 403 # Forbidden +define HTTP_NOTFOUND 404 # Not Found + +define HTTP_INTERR 500 # Internal Error +define HTTP_NOTIMP 501 # Not Implemented +define HTTP_OVERLOAD 502 # Service Temporarily Overloaded +define HTTP_GWTIMEOUT 503 # Gateway Timeout + +define SZ_BUF 8192 # download buffer + +define DBG_HDRS FALSE + + + +# URL_GET -- Do an HTTP GET on the given URL, save the results to the named +# file. If a 'reply' pointer is given, return the request reply string (must +# be allocated at least SZ_PATHNAME). + +int procedure url_get (url, fname, reply) + +char url[ARB] #i URL to access +char fname[ARB] #i local filename +pointer reply #u pointer to reply string + +char protocol[SZ_FNAME], host[SZ_FNAME], path[SZ_BUF], emsg[SZ_PATHNAME] +char inurl[SZ_PATHNAME], outname[SZ_PATHNAME] +int port, stat +pointer buf + +int url_access(), strcmp() +bool url_redirect() + +define redirect_ 99 + +begin + # Breakup the URL into usable pieces. + call strcpy (url, inurl, SZ_PATHNAME) +redirect_ + call url_break (inurl, protocol, host, port, path) + + # Check for a supported protocol. + if (strcmp (protocol, "http") != 0) { + call aclrc (emsg, SZ_PATHNAME) + call sprintf (emsg, SZ_PATHNAME, "Unsupported URI protocol (%s)") + call pargstr (protocol) + call error (0, emsg) + } + + # Download the file to the given name + call strcpy (fname, outname, SZ_PATHNAME) + + if (reply == NULL) { + call calloc (buf, SZ_LINE, TY_CHAR) + stat = url_access (host, port, path, outname, buf) + if (url_redirect (stat, buf, inurl)) { # check for a redirection + call mfree (buf, TY_CHAR) + goto redirect_ + } + call mfree (buf, TY_CHAR) + + } else { + stat = url_access (host, port, path, outname, reply) + if (url_redirect (stat, reply, inurl)) # check for a redirection + goto redirect_ + } + + # URL Error Codes are returned as negative values, positive values + # are the number of bytes read. We let the caller decode the return + # value, if desired, using the url_errcode() procedure. + + return (stat) +end + + +# URL_REDIRECT -- Check for a redirection reply code and modify the URL so +# we can try again. + +bool procedure url_redirect (stat, reply, url) + +int stat #i status code +pointer reply #i pointer to reply string +char url[ARB] #u access url + +int code, loc +pointer ip, op +char inurl[SZ_LINE] + +int strsearch() +bool streq() + +begin + code = - stat + + if (code == HTTP_MOVED || code == HTTP_FOUND || code == HTTP_SEEOTHER) { + loc = strsearch (Memc[reply], "Location:") + if (loc > 0) { + call aclrc (inurl, SZ_LINE) + call strcpy (url, inurl, SZ_LINE) + for (ip=reply+loc; IS_WHITE(Memc[ip]); ip=ip+1) + ; + for (op=1; Memc[ip] != '\n'; op=op+1) { + url[op] = Memc[ip] + ip = ip + 1 + } + url[op-1] = EOS + + if (streq (inurl, url)) + return (FALSE) + + return (TRUE) + } + } + + return (FALSE) +end + + +# URL_BREAK -- Break the URL into components needed to make the netpath. + +procedure url_break (url, protocol, host, port, path) + +char url[SZ_BUF] #i url to parse +char protocol[ARB] #o URL protocol (only HTTP, for now) +char host[ARB] #o host name +int port #o server port (if specified, or 80) +char path[ARB] #o path part of URL, including args + +int i, nch, ip +int ctoi() + +begin + port = 80 # set default port number + + # Pull out the protocol part of the URL. + for (ip=1; url[ip] != ':'; ip = ip + 1) + protocol[ip] = url[ip] + protocol[ip] = '\0' + + # Skip the "://" separator. + while (url[ip] == ':' || url[ip] == '/') + ip = ip + 1 + + # Get the host name. + for (i=1; url[ip] != ':' && url[ip] != '/' && url[ip] != EOS; i=i+1) { + host[i] = url[ip] + ip = ip + 1 + } + host[i] = '\0' + + if (url[ip] == EOS) { + call strcpy ("/", path, 2) + return + } + + # Extract a port number of specified + if (url[ip] == ':') { + ip = ip + 1 + nch = ctoi (url, ip, port) + } + + # Get the remaining path. + for (i=1; url[ip] != EOS; i = i + 1) { + path[i] = url[ip] + ip = ip + 1 + } + path[i] = '\0' +end + + +# URL_ACCESS -- Do an HTTP GET of a resource to the named file. + +int procedure url_access (host, port, path, fname, reply) + +char host[ARB] #i host name +int port #i server port number +char path[ARB] #i resource path +char fname[ARB] #i saved file path +pointer reply #i reply buffer + +pointer rep +int in, out, nchars, totchars, retcode, clen, ip +char buf[SZ_BUF], netpath[SZ_PATHNAME], request[SZ_BUF], hd[SZ_PATHNAME] +bool done + +int open(), access(), ndopen(), getline(), read(), strlen(), ctoi() +int strncmp(), url_retcode() + +begin + # Connect to server on the given host. + call sprintf (netpath, SZ_PATHNAME, "inet:%d:%s:%s") + call pargi (port) + call pargstr (host) + call pargstr ("text") + + iferr (in = ndopen (netpath, READ_WRITE)) { + call eprintf ("cannot access host '%s:%d'\n") + call pargstr (host) + call pargi (port) + return (- HTTP_NOTFOUND) + } + + # Format the request header. + call aclrc (request, SZ_BUF) + call sprintf (request, SZ_BUF, "GET %s HTTP/1.0\n") + call pargstr (path) + call strcat ("Accept: */*\n", request, SZ_BUF) + call strcat ("User-Agent: IRAF/urlget\n", request, SZ_BUF) + call strcat ("Host: ", request, SZ_BUF) + call strcat ( host, request, SZ_BUF) + call strcat ("\n", request, SZ_BUF) + call strcat ("Connection: keep-alive\n\n", request, SZ_BUF) + + # Send the GET-url request to the server. + nchars = strlen (request) + call write (in, request, nchars) + call flush (in) + call fseti (in, F_CANCEL, OK) + + if (DBG_HDRS) { + call eprintf ("request [%d]:\n%s\n") + call pargi (nchars) + call pargstr (request) + } + + # Read the reply. Read the HTTP header assuming it ends with a \n or + # a \r\n. and then validate it will return the request correctly. + done = false + clen = -1 + call calloc (rep, SZ_PATHNAME, TY_CHAR) + repeat { + call aclrc (hd, SZ_PATHNAME) + nchars = getline (in, hd) + if (nchars <= 0) + break + call strcat (hd, Memc[rep], SZ_PATHNAME) + if (strncmp (hd, "Content-Length:", 15) == 0) { + ip = 16 + nchars = ctoi (hd, ip, clen) + } + } until ((hd[1] == '\r' && hd[2] == '\n') || (hd[1] == '\n')) + + if (DBG_HDRS) { + call eprintf ("reply: %s\nclen = %d\n") + call pargstr (Memc[rep]) + call pargi(clen) + } + + # Make sure we have a valid file. + retcode = url_retcode (Memc[rep]) + + if (reply != NULL) + call strcpy (Memc[rep], Memc[reply], SZ_PATHNAME) + call mfree (rep, TY_CHAR) + if (retcode != HTTP_OK) + return (- retcode) + + + # Open the named output file. + if (access (fname, 0, 0) == YES) + call syserrs (SYS_FCLOBBER, fname) + iferr (out = open (fname, NEW_FILE, TEXT_FILE)) + call syserrs (SYS_FOPEN, fname) + + # Now read the resource and save it to the named file. + totchars = 0 + done = false + repeat { + call aclrc (buf, SZ_BUF) + nchars = read (in, buf, SZ_BUF) + if (nchars > 0) { + call write (out, buf, nchars) + call flush (out) + totchars = totchars + nchars + done = false + } else + done = true + + if (clen > 0 && totchars >= clen) + break + } until (done) + + call close (in) # clean up + call close (out) + + return (totchars) # return number of chars read +end + + +# URL_RETCODE -- Get the return code from the HTTP header reply. + +int procedure url_retcode (reply) + +char reply[ARB] #i reply string + +int ip, len, code, ctoi() + +begin + for (ip=1; !IS_WHITE(reply[ip]); ip=ip+1) + ; + len = ctoi (reply, ip, code) + + return (code) +end + + +# URL_ERRCODE - Convert between an HTTP return code and the equivalent +# syserr() code value. + +int procedure url_errcode (code) + +int code #i http return code + +begin + # Note: Not all error codes are implemented in syserr. In this + # case we just return the input code. + + switch (code) { + case HTTP_OK: # Success + ; + case HTTP_CREATED: # Created + ; + case HTTP_ACCEPTED: # Accepted + ; + case HTTP_PARTIAL: # Partial Information + ; + case HTTP_NORESP: # No Response + ; + + case HTTP_MOVED: # Moved + return (SYS_URLREDIRECT); + case HTTP_FOUND: # Found + return (SYS_URLREDIRECT); + case HTTP_SEEOTHER: # See Other + return (SYS_URLREDIRECT); + case HTTP_NOTMOD: # Not Modified + ; + + case HTTP_BADREQ: # Bad Request + return (SYS_URLBADREQUEST) + case HTTP_UNAUTH: # Unauthorized + ; + case HTTP_PAYMENT: # Payment Required + ; + case HTTP_FORBIDDEN: # Forbidden + return (SYS_URLFORBIDDEN) + case HTTP_NOTFOUND: # Not Found + return (SYS_URLNOTFOUND) + + case HTTP_INTERR: # Internal Error + return (SYS_URLINTERROR) + case HTTP_NOTIMP: # Not Implemented + ; + case HTTP_OVERLOAD: # Service Temporarily Overloaded + ; + case HTTP_GWTIMEOUT: # Gateway Timeout + ; + } + + return (code) +end |