aboutsummaryrefslogtreecommitdiff
path: root/splitfits.c
diff options
context:
space:
mode:
authorJoseph Hunkeler <jhunkeler@gmail.com>2020-06-15 17:54:19 -0400
committerJoseph Hunkeler <jhunkeler@gmail.com>2020-06-15 17:54:19 -0400
commitcd49c72d1dd9a0c6afff573ffd2ffaac73c2f0a0 (patch)
tree559ea1175909fcd9e26982a87c389333cc4e4210 /splitfits.c
parentb783486d00d3b04330e401889cdd3b08864c4ae5 (diff)
downloadsplitfits-cd49c72d1dd9a0c6afff573ffd2ffaac73c2f0a0.tar.gz
Comments
Diffstat (limited to 'splitfits.c')
-rw-r--r--splitfits.c139
1 files changed, 99 insertions, 40 deletions
diff --git a/splitfits.c b/splitfits.c
index 004634b..7a7c3c6 100644
--- a/splitfits.c
+++ b/splitfits.c
@@ -89,6 +89,10 @@ struct DataFrame {
size_t num_alloc; // Number of records allocated
};
+/**
+ * Initialize `DataFrame` structure
+ * @return `DataFrame`
+ */
struct DataFrame *dataframe_init() {
struct DataFrame *ctx;
ctx = calloc(1, sizeof(struct DataFrame));
@@ -100,6 +104,10 @@ struct DataFrame *dataframe_init() {
return ctx;
}
+/**
+ * Allocate another record for offsets in `start` and `stop` arrays
+ * @param ctx address of `DataFrame` structure
+ */
void dataframe_new(struct DataFrame **ctx) {
size_t *tmp;
tmp = realloc((*ctx)->start, ((*ctx)->num_alloc + 1) * sizeof(size_t *));
@@ -120,6 +128,12 @@ void dataframe_new(struct DataFrame **ctx) {
(*ctx)->num_inuse += 2;
}
+/**
+ * Compile a listing of start/stop offsets. Write each chunk as an individual file.
+ * @param _filename FITS file
+ * @param dest path (may be NULL)
+ * @return
+ */
int split_file(const char *_filename, const char *dest) {
FILE *fp_in;
FILE *fp_out;
@@ -132,26 +146,36 @@ int split_file(const char *_filename, const char *dest) {
size_t block_size;
size_t filepos;
int i, done;
- struct DataFrame *dataFrame, *hb;
+ struct DataFrame *dataFrame;
- bytes_read = 0;
- bytes_write = 0;
i = 0;
done = 0;
block = calloc(FITS_BLOCK, sizeof(char));
fp_in = fopen(_filename, "rb");
+ if (fp_in == NULL) {
+ perror(_filename);
+ exit(1);
+ }
mapfile = _mapfile;
- sprintf(mapfile, "%s/%s.map", dest ? dest : ".", _filename);
+ sprintf(mapfile, "%s/%s.part_map", dest ? dest : ".", _filename);
- filepos = 0;
block_size = FITS_BLOCK;
dataFrame = dataframe_init();
size_t off;
off = 0;
+
map_out = fopen(mapfile, "w+");
+ if (map_out == NULL) {
+ perror(mapfile);
+ exit(1);
+ }
+ printf("Map: %s\n", mapfile);
+
+ // Outer loop increments .partN file counter per iteration
while (!done) {
+ // Read input file and collate header start/stop offets
while (1) {
filepos = ftell(fp_in);
bytes_read = fread(block, sizeof(char), block_size, fp_in);
@@ -167,48 +191,43 @@ int split_file(const char *_filename, const char *dest) {
if (is_header_end(block)) {
filepos = ftell(fp_in);
dataFrame->stop[dataFrame->num_inuse] = filepos;
+ // Move on to next part
break;
}
}
- off += 2;
+ // Allocate a new record when we are NOT DONE reading the input file
if (!done) {
- dataframe_new(&dataFrame);
+ dataframe_new(&dataFrame); // allocates TWO records per call
}
}
- size_t last;
+ // Fill in ODD record gaps (EVEN records are header data) with data between headers
for (off = 0; off < dataFrame->num_inuse; off += 2) {
+ // Ignore first pair because it's guaranteed to be a header
if (off < 2) {
continue;
}
- size_t size;
- size = dataFrame->start[off] - dataFrame->stop[off - 2];
- last = off - 1;
-
- dataFrame->start[last] = dataFrame->stop[off - 2];
- dataFrame->stop[last] = dataFrame->start[off];
+ // Assign ODD offsets
+ dataFrame->start[off - 1] = dataFrame->stop[off - 2];
+ dataFrame->stop[off - 1] = dataFrame->start[off];
}
+ // Assign final offset leading up the end of the file
dataFrame->start[off - 1] = dataFrame->stop[off - 2];
fseek(fp_in, 0L, SEEK_END);
dataFrame->stop[off - 1] = ftell(fp_in);
- printf("info:\n");
- for (size_t d = 0; d < dataFrame->num_inuse; d++) {
- printf("%zu: start: %zu, stop: %zu\n", d, dataFrame->start[d], dataFrame->stop[d]);
- }
-
- done = 0;
+ // Reuse input file handle
rewind(fp_in);
- size_t data_size;
- data_size = 0;
+ // Read offset from the input files and write it to its respective .part_N file
for (off = 0; off < dataFrame->num_inuse; off++) {
char path[PATH_MAX];
char filename[PATH_MAX];
char *ext;
+ // Get dirname of input path
strcpy(path, _filename);
if (dest != NULL) {
strcpy(path, dest);
@@ -216,35 +235,51 @@ int split_file(const char *_filename, const char *dest) {
get_dirname(path);
}
+ // Get basename of input file
strcpy(filename, _filename);
get_basename(filename);
+ // When the basename and dirname are the same, use the current working directory path
if (strcmp(path, filename) == 0) {
strcpy(path, ".");
}
+ // Create output file name
sprintf(outfile, "%s/%s", path, filename);
+
+ // Strip file extension from output file
if ((ext = strrchr(outfile, '.')) == NULL) {
fprintf(stderr, "%s: does not have an extension\n", outfile);
} else {
*ext = '\0';
}
+ // When headers physically border one another, this can happen
if (dataFrame->start[off] == dataFrame->stop[off]) {
printf("skipped %d: identical begin/end offset\n", i);
i++;
continue;
}
+ // Finalize output file name
sprintf(outfile + strlen(outfile), ".part_%d", i);
- printf("creating %s\n", outfile);
+
+ printf("Creating: %s\n", outfile);
fp_out = fopen(outfile, "w+b");
+ if (fp_out == NULL) {
+ perror(outfile);
+ exit(1);
+ }
- filepos = ftell(fp_in);
block_size = FITS_BLOCK;
- fprintf(map_out, "%zu:", filepos);
+ // Seek to first offset (probably zero)
fseek(fp_in, dataFrame->start[off], SEEK_SET);
+ filepos = ftell(fp_in);
+ // Write start offset to map
+ fprintf(map_out, "%zu:", filepos);
+
+ // Read (input) / write (part) for each offset in the data frame
while(1) {
filepos = ftell(fp_in);
@@ -254,7 +289,6 @@ int split_file(const char *_filename, const char *dest) {
bytes_read = fread(block, sizeof(char), block_size, fp_in);
if (bytes_read < 1) {
- done = 1;
break;
}
@@ -267,18 +301,29 @@ int split_file(const char *_filename, const char *dest) {
fclose(fp_out);
- char *bname = get_basename(outfile);
+ // Record output file offset and basename in the map
+ char *bname;
+ bname = get_basename(outfile);
fprintf(map_out, "%zu:%s\n", filepos, bname);
+
+ // Next part
i++;
}
fclose(map_out);
return 0;
}
+/**
+ * Reconstruct a file using a .part_map file
+ * @param _filename path to .part_map file
+ * @param dest path to store reconstructed file (may be NULL)
+ * @return
+ */
int combine_file(const char *_filename, const char *dest) {
- size_t bytes;
char buffer[PATH_MAX];
+ char path[PATH_MAX];
char outfile[PATH_MAX];
+ char *filename;
char *block;
char *ext;
FILE *fp_in;
@@ -292,9 +337,6 @@ int combine_file(const char *_filename, const char *dest) {
exit(1);
}
- char *filename;
- char path[PATH_MAX];
-
filename = calloc(PATH_MAX, sizeof(char));
strcpy(filename, _filename);
@@ -325,18 +367,21 @@ int combine_file(const char *_filename, const char *dest) {
char *name;
FILE *fp_tmp;
+ // Get .part_N file name
mark = strrchr(buffer, ':');
if (mark != NULL) {
mark++;
name = strdup(mark);
}
+ // Open .part_N for reading
fp_tmp = fopen(name, "r");
if (fp_tmp == NULL) {
perror(name);
exit(1);
}
+ // Append .part_N data to the output file sequentially
printf("Reading: %s\n", name);
while (1) {
if (fread(block, sizeof(char), FITS_BLOCK, fp_tmp) < 1) {
@@ -361,25 +406,26 @@ int main(int argc, char *argv[]) {
char *prog;
char *outdir;
- prog = strrchr(argv[0], '/');
- outdir = NULL;
+ // Set program name
+ prog = get_basename(argv[0]);
- if (prog == NULL) {
- prog = argv[0];
- } else {
- prog++;
- }
+ // Output directory (default of NULL indicates "current directory");
+ outdir = NULL;
+ // Check program argument count
if (argc < 2) {
printf("usage: %s [-o DIR] {[-c MAP_FILE] | FILE(s)}\n", prog);
printf(" Options:\n");
- printf(" -c --combine Reconstruct original file using .map data\n");
+ printf(" -c --combine Reconstruct original file using .part_map data\n");
printf(" -o --outdir Path where output files are stored\n");
exit(1);
}
+
+ // Parse program arguments
size_t inputs;
for (inputs = 1; inputs < argc; inputs++) {
+ // User-defined output directory
if (strcmp(argv[inputs], "-o") == 0 || strcmp(argv[inputs], "--outdir") == 0) {
inputs++;
if (access(argv[inputs], R_OK | W_OK | X_OK) != 0) {
@@ -389,6 +435,7 @@ int main(int argc, char *argv[]) {
continue;
}
+ // User wants to reconstruct a FITS file using a .part_map
if (strcmp(argv[inputs], "-c") == 0 || strcmp(argv[inputs], "--combine") == 0) {
if (argc < 3) {
fprintf(stderr, "-c|--combine requires an argument (MAP file)");
@@ -396,20 +443,28 @@ int main(int argc, char *argv[]) {
}
int combine;
+ const char *map_file;
+
+ // Shift to next argument
inputs++;
- const char *map_file = argv[inputs];
+ // Get part map filename
+ map_file = argv[inputs];
+
+ // Make sure it exists
if (access(map_file, F_OK) != 0) {
fprintf(stderr, "%s: data file does not exist\n", map_file);
exit(1);
}
+ // Reconstruct FITS file
combine = combine_file(map_file, outdir);
exit(combine);
}
break;
}
+ // Make sure all input files exist
bad_files = 0;
for (size_t i = inputs; i < argc; i++) {
if (access(argv[i], F_OK) != 0) {
@@ -418,20 +473,24 @@ int main(int argc, char *argv[]) {
}
}
+ // If not all input files exist, then die
if (bad_files) {
fprintf(stderr, "Exiting...\n");
exit(1);
}
+ // Make sure user-defined output directory exists
if (outdir != NULL && access(outdir, F_OK) != 0) {
fprintf(stderr, "%s: %s\n", outdir, strerror(errno));
exit(1);
}
+ // Split all input files
for (size_t i = inputs; i < argc; i++) {
split_file(argv[i], outdir);
}
+ // Clean up
if (outdir != NULL) {
free(outdir);
}