diff options
| -rw-r--r-- | splitfits.c | 139 | 
1 files changed, 99 insertions, 40 deletions
| diff --git a/splitfits.c b/splitfits.c index 004634b..7a7c3c6 100644 --- a/splitfits.c +++ b/splitfits.c @@ -89,6 +89,10 @@ struct DataFrame {      size_t num_alloc;   // Number of records allocated  }; +/** + * Initialize `DataFrame` structure + * @return `DataFrame` + */  struct DataFrame *dataframe_init() {      struct DataFrame *ctx;      ctx = calloc(1, sizeof(struct DataFrame)); @@ -100,6 +104,10 @@ struct DataFrame *dataframe_init() {      return ctx;  } +/** + * Allocate another record for offsets in `start` and `stop` arrays + * @param ctx address of `DataFrame` structure + */  void dataframe_new(struct DataFrame **ctx) {      size_t *tmp;      tmp = realloc((*ctx)->start, ((*ctx)->num_alloc + 1) * sizeof(size_t *)); @@ -120,6 +128,12 @@ void dataframe_new(struct DataFrame **ctx) {      (*ctx)->num_inuse += 2;  } +/** + * Compile a listing of start/stop offsets. Write each chunk as an individual file. + * @param _filename FITS file + * @param dest path (may be NULL) + * @return + */  int split_file(const char *_filename, const char *dest) {      FILE *fp_in;      FILE *fp_out; @@ -132,26 +146,36 @@ int split_file(const char *_filename, const char *dest) {      size_t block_size;      size_t filepos;      int i, done; -    struct DataFrame *dataFrame, *hb; +    struct DataFrame *dataFrame; -    bytes_read = 0; -    bytes_write = 0;      i = 0;      done = 0;      block = calloc(FITS_BLOCK, sizeof(char));      fp_in = fopen(_filename, "rb"); +    if (fp_in == NULL) { +        perror(_filename); +        exit(1); +    }      mapfile = _mapfile; -    sprintf(mapfile, "%s/%s.map", dest ? dest : ".", _filename); +    sprintf(mapfile, "%s/%s.part_map", dest ? dest : ".", _filename); -    filepos = 0;      block_size = FITS_BLOCK;      dataFrame = dataframe_init();      size_t off;      off = 0; +      map_out = fopen(mapfile, "w+"); +    if (map_out == NULL) { +        perror(mapfile); +        exit(1); +    } +    printf("Map: %s\n", mapfile); + +    // Outer loop increments .partN file counter per iteration      while (!done) { +        // Read input file and collate header start/stop offets          while (1) {              filepos = ftell(fp_in);              bytes_read = fread(block, sizeof(char), block_size, fp_in); @@ -167,48 +191,43 @@ int split_file(const char *_filename, const char *dest) {              if (is_header_end(block)) {                  filepos = ftell(fp_in);                  dataFrame->stop[dataFrame->num_inuse] = filepos; +                // Move on to next part                  break;              }          } -        off += 2; +        // Allocate a new record when we are NOT DONE reading the input file          if (!done) { -            dataframe_new(&dataFrame); +            dataframe_new(&dataFrame); // allocates TWO records per call          }      } -    size_t last; +    // Fill in ODD record gaps (EVEN records are header data) with data between headers      for (off = 0; off < dataFrame->num_inuse; off += 2) { +        // Ignore first pair because it's guaranteed to be a header          if (off < 2) {              continue;          } -        size_t size; -        size = dataFrame->start[off] - dataFrame->stop[off - 2]; -        last = off - 1; - -        dataFrame->start[last] = dataFrame->stop[off - 2]; -        dataFrame->stop[last] = dataFrame->start[off]; +        // Assign ODD offsets +        dataFrame->start[off - 1] = dataFrame->stop[off - 2]; +        dataFrame->stop[off - 1] = dataFrame->start[off];      } +    // Assign final offset leading up the end of the file      dataFrame->start[off - 1] = dataFrame->stop[off - 2];      fseek(fp_in, 0L, SEEK_END);      dataFrame->stop[off - 1] = ftell(fp_in); -    printf("info:\n"); -    for (size_t d = 0; d < dataFrame->num_inuse; d++) { -        printf("%zu: start: %zu, stop: %zu\n", d, dataFrame->start[d], dataFrame->stop[d]); -    } - -    done = 0; +    // Reuse input file handle      rewind(fp_in); -    size_t data_size; -    data_size = 0; +    // Read offset from the input files and write it to its respective .part_N file      for (off = 0; off < dataFrame->num_inuse; off++) {          char path[PATH_MAX];          char filename[PATH_MAX];          char *ext; +        // Get dirname of input path          strcpy(path, _filename);          if (dest != NULL) {              strcpy(path, dest); @@ -216,35 +235,51 @@ int split_file(const char *_filename, const char *dest) {              get_dirname(path);          } +        // Get basename of input file          strcpy(filename, _filename);          get_basename(filename); +        // When the basename and dirname are the same, use the current working directory path          if (strcmp(path, filename) == 0) {              strcpy(path, ".");          } +        // Create output file name          sprintf(outfile, "%s/%s", path, filename); + +        // Strip file extension from output file          if ((ext = strrchr(outfile, '.')) == NULL) {              fprintf(stderr, "%s: does not have an extension\n", outfile);          } else {              *ext = '\0';          } +        // When headers physically border one another, this can happen          if (dataFrame->start[off] == dataFrame->stop[off]) {              printf("skipped %d: identical begin/end offset\n", i);              i++;              continue;          } +        // Finalize output file name          sprintf(outfile + strlen(outfile), ".part_%d", i); -        printf("creating %s\n", outfile); + +        printf("Creating: %s\n", outfile);          fp_out = fopen(outfile, "w+b"); +        if (fp_out == NULL) { +            perror(outfile); +            exit(1); +        } -        filepos = ftell(fp_in);          block_size = FITS_BLOCK; -        fprintf(map_out, "%zu:", filepos); +        // Seek to first offset (probably zero)          fseek(fp_in, dataFrame->start[off], SEEK_SET); +        filepos = ftell(fp_in); +        // Write start offset to map +        fprintf(map_out, "%zu:", filepos); + +        // Read (input) / write (part) for each offset in the data frame          while(1) {              filepos = ftell(fp_in); @@ -254,7 +289,6 @@ int split_file(const char *_filename, const char *dest) {              bytes_read = fread(block, sizeof(char), block_size, fp_in);              if (bytes_read < 1) { -                done = 1;                  break;              } @@ -267,18 +301,29 @@ int split_file(const char *_filename, const char *dest) {          fclose(fp_out); -        char *bname = get_basename(outfile); +        // Record output file offset and basename in the map +        char *bname; +        bname = get_basename(outfile);          fprintf(map_out, "%zu:%s\n", filepos, bname); + +        // Next part          i++;      }      fclose(map_out);      return 0;  } +/** + * Reconstruct a file using a .part_map file + * @param _filename path to .part_map file + * @param dest path to store reconstructed file (may be NULL) + * @return + */  int combine_file(const char *_filename, const char *dest) { -    size_t bytes;      char buffer[PATH_MAX]; +    char path[PATH_MAX];      char outfile[PATH_MAX]; +    char *filename;      char *block;      char *ext;      FILE *fp_in; @@ -292,9 +337,6 @@ int combine_file(const char *_filename, const char *dest) {          exit(1);      } -    char *filename; -    char path[PATH_MAX]; -      filename = calloc(PATH_MAX, sizeof(char));      strcpy(filename, _filename); @@ -325,18 +367,21 @@ int combine_file(const char *_filename, const char *dest) {          char *name;          FILE *fp_tmp; +        // Get .part_N file name          mark = strrchr(buffer, ':');          if (mark != NULL) {              mark++;              name = strdup(mark);          } +        // Open .part_N for reading          fp_tmp = fopen(name, "r");          if (fp_tmp == NULL) {              perror(name);              exit(1);          } +        // Append .part_N data to the output file sequentially          printf("Reading: %s\n", name);          while (1) {              if (fread(block, sizeof(char), FITS_BLOCK, fp_tmp) < 1) { @@ -361,25 +406,26 @@ int main(int argc, char *argv[]) {      char *prog;      char *outdir; -    prog = strrchr(argv[0], '/'); -    outdir = NULL; +    // Set program name +    prog = get_basename(argv[0]); -    if (prog == NULL) { -        prog = argv[0]; -    } else { -        prog++; -    } +    // Output directory (default of NULL indicates "current directory"); +    outdir = NULL; +    // Check program argument count      if (argc < 2) {          printf("usage: %s [-o DIR] {[-c MAP_FILE] | FILE(s)}\n", prog);          printf(" Options:\n"); -        printf("   -c  --combine    Reconstruct original file using .map data\n"); +        printf("   -c  --combine    Reconstruct original file using .part_map data\n");          printf("   -o  --outdir     Path where output files are stored\n");          exit(1);      } + +    // Parse program arguments      size_t inputs;      for (inputs = 1; inputs < argc; inputs++) { +        // User-defined output directory          if (strcmp(argv[inputs], "-o") == 0 || strcmp(argv[inputs], "--outdir") == 0) {              inputs++;              if (access(argv[inputs], R_OK | W_OK | X_OK) != 0) { @@ -389,6 +435,7 @@ int main(int argc, char *argv[]) {              continue;          } +        // User wants to reconstruct a FITS file using a .part_map          if (strcmp(argv[inputs], "-c") == 0 || strcmp(argv[inputs], "--combine") == 0) {              if (argc < 3) {                  fprintf(stderr, "-c|--combine requires an argument (MAP file)"); @@ -396,20 +443,28 @@ int main(int argc, char *argv[]) {              }              int combine; +            const char *map_file; + +            // Shift to next argument              inputs++; -            const char *map_file = argv[inputs]; +            // Get part map filename +            map_file = argv[inputs]; + +            // Make sure it exists              if (access(map_file, F_OK) != 0) {                  fprintf(stderr, "%s: data file does not exist\n", map_file);                  exit(1);              } +            // Reconstruct FITS file              combine = combine_file(map_file, outdir);              exit(combine);          }          break;      } +    // Make sure all input files exist      bad_files = 0;      for (size_t i = inputs; i < argc; i++) {          if (access(argv[i], F_OK) != 0) { @@ -418,20 +473,24 @@ int main(int argc, char *argv[]) {          }      } +    // If not all input files exist, then die      if (bad_files) {          fprintf(stderr, "Exiting...\n");          exit(1);      } +    // Make sure user-defined output directory exists      if (outdir != NULL && access(outdir, F_OK) != 0) {          fprintf(stderr, "%s: %s\n", outdir, strerror(errno));          exit(1);      } +    // Split all input files      for (size_t i = inputs; i < argc; i++) {          split_file(argv[i], outdir);      } +    // Clean up      if (outdir != NULL) {          free(outdir);      } | 
