diff options
author | Joseph Hunkeler <jhunkeler@gmail.com> | 2020-06-15 17:54:19 -0400 |
---|---|---|
committer | Joseph Hunkeler <jhunkeler@gmail.com> | 2020-06-15 17:54:19 -0400 |
commit | cd49c72d1dd9a0c6afff573ffd2ffaac73c2f0a0 (patch) | |
tree | 559ea1175909fcd9e26982a87c389333cc4e4210 /splitfits.c | |
parent | b783486d00d3b04330e401889cdd3b08864c4ae5 (diff) | |
download | splitfits-cd49c72d1dd9a0c6afff573ffd2ffaac73c2f0a0.tar.gz |
Comments
Diffstat (limited to 'splitfits.c')
-rw-r--r-- | splitfits.c | 139 |
1 files changed, 99 insertions, 40 deletions
diff --git a/splitfits.c b/splitfits.c index 004634b..7a7c3c6 100644 --- a/splitfits.c +++ b/splitfits.c @@ -89,6 +89,10 @@ struct DataFrame { size_t num_alloc; // Number of records allocated }; +/** + * Initialize `DataFrame` structure + * @return `DataFrame` + */ struct DataFrame *dataframe_init() { struct DataFrame *ctx; ctx = calloc(1, sizeof(struct DataFrame)); @@ -100,6 +104,10 @@ struct DataFrame *dataframe_init() { return ctx; } +/** + * Allocate another record for offsets in `start` and `stop` arrays + * @param ctx address of `DataFrame` structure + */ void dataframe_new(struct DataFrame **ctx) { size_t *tmp; tmp = realloc((*ctx)->start, ((*ctx)->num_alloc + 1) * sizeof(size_t *)); @@ -120,6 +128,12 @@ void dataframe_new(struct DataFrame **ctx) { (*ctx)->num_inuse += 2; } +/** + * Compile a listing of start/stop offsets. Write each chunk as an individual file. + * @param _filename FITS file + * @param dest path (may be NULL) + * @return + */ int split_file(const char *_filename, const char *dest) { FILE *fp_in; FILE *fp_out; @@ -132,26 +146,36 @@ int split_file(const char *_filename, const char *dest) { size_t block_size; size_t filepos; int i, done; - struct DataFrame *dataFrame, *hb; + struct DataFrame *dataFrame; - bytes_read = 0; - bytes_write = 0; i = 0; done = 0; block = calloc(FITS_BLOCK, sizeof(char)); fp_in = fopen(_filename, "rb"); + if (fp_in == NULL) { + perror(_filename); + exit(1); + } mapfile = _mapfile; - sprintf(mapfile, "%s/%s.map", dest ? dest : ".", _filename); + sprintf(mapfile, "%s/%s.part_map", dest ? dest : ".", _filename); - filepos = 0; block_size = FITS_BLOCK; dataFrame = dataframe_init(); size_t off; off = 0; + map_out = fopen(mapfile, "w+"); + if (map_out == NULL) { + perror(mapfile); + exit(1); + } + printf("Map: %s\n", mapfile); + + // Outer loop increments .partN file counter per iteration while (!done) { + // Read input file and collate header start/stop offets while (1) { filepos = ftell(fp_in); bytes_read = fread(block, sizeof(char), block_size, fp_in); @@ -167,48 +191,43 @@ int split_file(const char *_filename, const char *dest) { if (is_header_end(block)) { filepos = ftell(fp_in); dataFrame->stop[dataFrame->num_inuse] = filepos; + // Move on to next part break; } } - off += 2; + // Allocate a new record when we are NOT DONE reading the input file if (!done) { - dataframe_new(&dataFrame); + dataframe_new(&dataFrame); // allocates TWO records per call } } - size_t last; + // Fill in ODD record gaps (EVEN records are header data) with data between headers for (off = 0; off < dataFrame->num_inuse; off += 2) { + // Ignore first pair because it's guaranteed to be a header if (off < 2) { continue; } - size_t size; - size = dataFrame->start[off] - dataFrame->stop[off - 2]; - last = off - 1; - - dataFrame->start[last] = dataFrame->stop[off - 2]; - dataFrame->stop[last] = dataFrame->start[off]; + // Assign ODD offsets + dataFrame->start[off - 1] = dataFrame->stop[off - 2]; + dataFrame->stop[off - 1] = dataFrame->start[off]; } + // Assign final offset leading up the end of the file dataFrame->start[off - 1] = dataFrame->stop[off - 2]; fseek(fp_in, 0L, SEEK_END); dataFrame->stop[off - 1] = ftell(fp_in); - printf("info:\n"); - for (size_t d = 0; d < dataFrame->num_inuse; d++) { - printf("%zu: start: %zu, stop: %zu\n", d, dataFrame->start[d], dataFrame->stop[d]); - } - - done = 0; + // Reuse input file handle rewind(fp_in); - size_t data_size; - data_size = 0; + // Read offset from the input files and write it to its respective .part_N file for (off = 0; off < dataFrame->num_inuse; off++) { char path[PATH_MAX]; char filename[PATH_MAX]; char *ext; + // Get dirname of input path strcpy(path, _filename); if (dest != NULL) { strcpy(path, dest); @@ -216,35 +235,51 @@ int split_file(const char *_filename, const char *dest) { get_dirname(path); } + // Get basename of input file strcpy(filename, _filename); get_basename(filename); + // When the basename and dirname are the same, use the current working directory path if (strcmp(path, filename) == 0) { strcpy(path, "."); } + // Create output file name sprintf(outfile, "%s/%s", path, filename); + + // Strip file extension from output file if ((ext = strrchr(outfile, '.')) == NULL) { fprintf(stderr, "%s: does not have an extension\n", outfile); } else { *ext = '\0'; } + // When headers physically border one another, this can happen if (dataFrame->start[off] == dataFrame->stop[off]) { printf("skipped %d: identical begin/end offset\n", i); i++; continue; } + // Finalize output file name sprintf(outfile + strlen(outfile), ".part_%d", i); - printf("creating %s\n", outfile); + + printf("Creating: %s\n", outfile); fp_out = fopen(outfile, "w+b"); + if (fp_out == NULL) { + perror(outfile); + exit(1); + } - filepos = ftell(fp_in); block_size = FITS_BLOCK; - fprintf(map_out, "%zu:", filepos); + // Seek to first offset (probably zero) fseek(fp_in, dataFrame->start[off], SEEK_SET); + filepos = ftell(fp_in); + // Write start offset to map + fprintf(map_out, "%zu:", filepos); + + // Read (input) / write (part) for each offset in the data frame while(1) { filepos = ftell(fp_in); @@ -254,7 +289,6 @@ int split_file(const char *_filename, const char *dest) { bytes_read = fread(block, sizeof(char), block_size, fp_in); if (bytes_read < 1) { - done = 1; break; } @@ -267,18 +301,29 @@ int split_file(const char *_filename, const char *dest) { fclose(fp_out); - char *bname = get_basename(outfile); + // Record output file offset and basename in the map + char *bname; + bname = get_basename(outfile); fprintf(map_out, "%zu:%s\n", filepos, bname); + + // Next part i++; } fclose(map_out); return 0; } +/** + * Reconstruct a file using a .part_map file + * @param _filename path to .part_map file + * @param dest path to store reconstructed file (may be NULL) + * @return + */ int combine_file(const char *_filename, const char *dest) { - size_t bytes; char buffer[PATH_MAX]; + char path[PATH_MAX]; char outfile[PATH_MAX]; + char *filename; char *block; char *ext; FILE *fp_in; @@ -292,9 +337,6 @@ int combine_file(const char *_filename, const char *dest) { exit(1); } - char *filename; - char path[PATH_MAX]; - filename = calloc(PATH_MAX, sizeof(char)); strcpy(filename, _filename); @@ -325,18 +367,21 @@ int combine_file(const char *_filename, const char *dest) { char *name; FILE *fp_tmp; + // Get .part_N file name mark = strrchr(buffer, ':'); if (mark != NULL) { mark++; name = strdup(mark); } + // Open .part_N for reading fp_tmp = fopen(name, "r"); if (fp_tmp == NULL) { perror(name); exit(1); } + // Append .part_N data to the output file sequentially printf("Reading: %s\n", name); while (1) { if (fread(block, sizeof(char), FITS_BLOCK, fp_tmp) < 1) { @@ -361,25 +406,26 @@ int main(int argc, char *argv[]) { char *prog; char *outdir; - prog = strrchr(argv[0], '/'); - outdir = NULL; + // Set program name + prog = get_basename(argv[0]); - if (prog == NULL) { - prog = argv[0]; - } else { - prog++; - } + // Output directory (default of NULL indicates "current directory"); + outdir = NULL; + // Check program argument count if (argc < 2) { printf("usage: %s [-o DIR] {[-c MAP_FILE] | FILE(s)}\n", prog); printf(" Options:\n"); - printf(" -c --combine Reconstruct original file using .map data\n"); + printf(" -c --combine Reconstruct original file using .part_map data\n"); printf(" -o --outdir Path where output files are stored\n"); exit(1); } + + // Parse program arguments size_t inputs; for (inputs = 1; inputs < argc; inputs++) { + // User-defined output directory if (strcmp(argv[inputs], "-o") == 0 || strcmp(argv[inputs], "--outdir") == 0) { inputs++; if (access(argv[inputs], R_OK | W_OK | X_OK) != 0) { @@ -389,6 +435,7 @@ int main(int argc, char *argv[]) { continue; } + // User wants to reconstruct a FITS file using a .part_map if (strcmp(argv[inputs], "-c") == 0 || strcmp(argv[inputs], "--combine") == 0) { if (argc < 3) { fprintf(stderr, "-c|--combine requires an argument (MAP file)"); @@ -396,20 +443,28 @@ int main(int argc, char *argv[]) { } int combine; + const char *map_file; + + // Shift to next argument inputs++; - const char *map_file = argv[inputs]; + // Get part map filename + map_file = argv[inputs]; + + // Make sure it exists if (access(map_file, F_OK) != 0) { fprintf(stderr, "%s: data file does not exist\n", map_file); exit(1); } + // Reconstruct FITS file combine = combine_file(map_file, outdir); exit(combine); } break; } + // Make sure all input files exist bad_files = 0; for (size_t i = inputs; i < argc; i++) { if (access(argv[i], F_OK) != 0) { @@ -418,20 +473,24 @@ int main(int argc, char *argv[]) { } } + // If not all input files exist, then die if (bad_files) { fprintf(stderr, "Exiting...\n"); exit(1); } + // Make sure user-defined output directory exists if (outdir != NULL && access(outdir, F_OK) != 0) { fprintf(stderr, "%s: %s\n", outdir, strerror(errno)); exit(1); } + // Split all input files for (size_t i = inputs; i < argc; i++) { split_file(argv[i], outdir); } + // Clean up if (outdir != NULL) { free(outdir); } |