diff options
author | Joseph Hunkeler <jhunkeler@gmail.com> | 2020-06-15 12:34:07 -0400 |
---|---|---|
committer | Joseph Hunkeler <jhunkeler@gmail.com> | 2020-06-15 12:34:07 -0400 |
commit | 4a133b10d1e2870b789d18607f976a2a473726a1 (patch) | |
tree | 68de1a378d93241dacf441a5c6521a990d83970f /splitfits.c | |
parent | f612ed349ccc463b83a60b06a1d6574e5760eda0 (diff) | |
download | splitfits-4a133b10d1e2870b789d18607f976a2a473726a1.tar.gz |
Rewrote POC
Diffstat (limited to 'splitfits.c')
-rw-r--r-- | splitfits.c | 465 |
1 files changed, 287 insertions, 178 deletions
diff --git a/splitfits.c b/splitfits.c index f53f0ef..9b6612a 100644 --- a/splitfits.c +++ b/splitfits.c @@ -3,221 +3,339 @@ #include <string.h> #include <limits.h> #include <unistd.h> +#include <errno.h> -#define SPLITFITS_BLOCK 2880 +#define FITS_BLOCK 2880 +#define FITS_RECORD 80 -char *SPLITFITS_OUTDIR; -struct SplitFITS { - FILE *handle; - char *path_origin; - char *path_header; - char *path_data; - size_t header_size; - size_t data_size; -}; - -int splitfits_header_write(struct SplitFITS **ctx) { - char *block; - FILE *fp; - size_t count = 1; - size_t bytes; - size_t written; - - fp = fopen((*ctx)->path_header, "w+b"); - if (fp == NULL) { - perror((*ctx)->path_header); - return 1; - } - - block = calloc(SPLITFITS_BLOCK + 1, sizeof(char)); - - written = 0; - bytes = fread(block, sizeof(char), SPLITFITS_BLOCK, (*ctx)->handle); - while (bytes > 0) { - written = fwrite(block, sizeof(char), SPLITFITS_BLOCK, fp); - if (written < 1) { - perror("short write"); +int has_key(char *block, const char *key) { + for (size_t i = 0; i < FITS_BLOCK; i += FITS_RECORD) { + char record[FITS_RECORD]; + memcpy(record, block + i, FITS_RECORD - 1); + record[FITS_RECORD - 1] = '\0'; + if (strncmp(record, key, strlen(key)) == 0) { return 1; } - - if (strstr(block, "END ") != NULL) { - break; - } - - bytes = fread(block, sizeof(char), SPLITFITS_BLOCK, (*ctx)->handle); - count++; } - - (*ctx)->header_size = SPLITFITS_BLOCK * (count); - - fflush(fp); - fclose(fp); - free(block); return 0; } -int splitfits_data_write(struct SplitFITS **ctx) { - char *block; - FILE *fp; - fp = fopen((*ctx)->path_data, "w+b"); - - size_t size = 0; - fseek((*ctx)->handle, 0, SEEK_END); - size = ftell((*ctx)->handle) - (*ctx)->header_size; - fseek((*ctx)->handle, (*ctx)->header_size, SEEK_SET); - - block = calloc(SPLITFITS_BLOCK + 1, sizeof(char)); - - while (fread(block, sizeof(char), SPLITFITS_BLOCK, (*ctx)->handle) > 0) { - if (fwrite(block, sizeof(char), SPLITFITS_BLOCK, fp) < 1) { - perror("short write"); - return 1; - } +int is_header_start(char *block) { + if (has_key(block, "SIMPLE") || has_key(block, "XTENSION")) { + return 1; } - - (*ctx)->data_size = size; - - fclose(fp); - free(block); return 0; } -int splitfits_split(struct SplitFITS **ctx) { - if (splitfits_header_write(ctx) != 0 || splitfits_data_write(ctx) != 0) { +int is_header_end(char *block) { + if (has_key(block, "END") && block[FITS_BLOCK - 1] == ' ') { return 1; } return 0; } -void splitfits_free(struct SplitFITS *ctx) { - free(ctx->path_origin); - free(ctx->path_data); - free(ctx->path_header); - fclose(ctx->handle); - free(ctx); +char *get_basename(char *path) { + char *sep; + if ((sep = strrchr(path, '/')) == NULL) { + return path; + } + sep++; + path = sep; + return path; } -void splitfits_show(struct SplitFITS *ctx) { - printf("%s:\n", ctx->path_origin); - printf("\t%-20s (%zu bytes)\n", ctx->path_header, ctx->header_size); - printf("\t%-20s (%zu bytes)\n", ctx->path_data, ctx->data_size); +char *get_dirname(char *path) { + char *sep; + if ((sep = strrchr(path, '/')) == NULL) { + return path; + } + *sep = '\0'; + return path; } -int splitfits_combine(const char *headerfile, const char *datafile) { - FILE *handle[] = { - fopen(headerfile, "rb"), - fopen(datafile, "rb"), - }; - FILE *fp; - size_t handle_count; - char *suffix; - char tempfile[PATH_MAX]; - char outfile[PATH_MAX]; +struct HeaderBlock { + size_t *start; + size_t *stop; + size_t num_inuse; + size_t num_alloc; +}; - handle_count = sizeof(handle) / sizeof(FILE *); - sprintf(tempfile, "%s.tmp_XXXXXX", __FUNCTION__); +struct HeaderBlock *headerblock_init() { + struct HeaderBlock *ctx; + ctx = calloc(1, sizeof(struct HeaderBlock)); + ctx->num_inuse = 0; + ctx->num_alloc = 2; - if (mkstemp(tempfile) < 0) { - perror(tempfile); - return 1; + ctx->start = calloc(ctx->num_alloc, sizeof(size_t)); + ctx->stop = calloc(ctx->num_alloc, sizeof(size_t)); + return ctx; +} + +void headerblock_new(struct HeaderBlock **ctx) { + size_t *tmp; + tmp = realloc((*ctx)->start, ((*ctx)->num_alloc + 1) * sizeof(size_t *)); + if (tmp == NULL) { + perror("realloc start"); + exit(1); } + (*ctx)->start = tmp; - fp = fopen(tempfile, "w+b"); - if (fp == NULL) { - perror("could not open temporary file for writing"); - return 1; + tmp = realloc((*ctx)->stop, ((*ctx)->num_alloc + 1) * sizeof(size_t *)); + if (tmp == NULL) { + perror("realloc stop"); + exit(1); } + (*ctx)->stop = tmp; - char block[2] = {0, 0}; - size_t bytes = 0; - for (size_t i = 0; i < handle_count; i++) { + (*ctx)->num_alloc += 2; + (*ctx)->num_inuse += 2; +} + +int split_file(const char *_filename, const char *dest) { + FILE *fp_in; + FILE *fp_out; + FILE *map_out; + char outfile[PATH_MAX]; + char _mapfile[PATH_MAX]; + char *mapfile; + char *block; + size_t bytes_read, bytes_write, bytes_total; + size_t block_size; + int i, done, header_block, data_block; + struct HeaderBlock *headerBlock, *hb; + + + bytes_read = 0; + bytes_write = 0; + bytes_total = 0; + i = 0; + done = 0; + block = calloc(FITS_BLOCK, sizeof(char)); + fp_in = fopen(_filename, "rb"); + + mapfile = _mapfile; + sprintf(mapfile, "%s/%s.map", dest ? dest : ".", _filename); + + size_t filepos; + header_block = 0; + data_block = 0; + filepos = 0; + block_size = FITS_BLOCK; + headerBlock = headerblock_init(); + + size_t off; + off = 0; + map_out = fopen(mapfile, "w+"); + while (!done) { while (1) { - bytes = fread(block, sizeof(char), 1, handle[i]); - if (bytes) { - fwrite(block, sizeof(char), 1, fp); - } else { + filepos = ftell(fp_in); + bytes_read = fread(block, sizeof(char), block_size, fp_in); + if (bytes_read < 1) { + done = 1; + break; + } + + if (is_header_start(block)) { + headerBlock->start[headerBlock->num_inuse] = filepos; + } + + if (is_header_end(block)) { + filepos = ftell(fp_in); + headerBlock->stop[headerBlock->num_inuse] = filepos; break; } } - fclose(handle[i]); - } - rewind(fp); + off += 2; - strcpy(outfile, headerfile); - suffix = strstr(outfile, "_hdr.txt"); - if (suffix == NULL) { - fprintf(stderr, "%s: does not have the correct suffix (_hdr.txt)\n", outfile); - return 1; + if (!done) { + headerblock_new(&headerBlock); + } } - *suffix = '\0'; - strcat(suffix, ".fits"); + size_t last; + for (off = 0; off < headerBlock->num_inuse; off += 2) { + if (off < 2) { + continue; + } - printf("Writing: %s\n", outfile); + size_t size; + size = headerBlock->start[off] - headerBlock->stop[off - 2]; + last = off - 1; - FILE *ofp; - ofp = fopen(outfile, "w+b"); - if (ofp == NULL) { - perror(outfile); - return 1; + headerBlock->start[last] = headerBlock->stop[off - 2]; + headerBlock->stop[last] = headerBlock->start[off]; } + headerBlock->start[off - 1] = headerBlock->stop[off - 2]; + fseek(fp_in, 0L, SEEK_END); + headerBlock->stop[off - 1] = ftell(fp_in); - while (fread(block, sizeof(char), 1, fp) != 0) { - fwrite(block, sizeof(char), 1, ofp); + printf("info:\n"); + for (size_t d = 0; d < headerBlock->num_inuse; d++) { + printf("%zu: start: %zu, stop: %zu\n", d, headerBlock->start[d], headerBlock->stop[d]); } - fclose(ofp); - fclose(fp); - remove(tempfile); + done = 0; + rewind(fp_in); + + size_t data_size; + data_size = 0; + for (off = 0; off < headerBlock->num_inuse; off++) { + char path[PATH_MAX]; + char filename[PATH_MAX]; + char *ext; + + strcpy(path, _filename); + if (dest != NULL) { + strcpy(path, dest); + } else { + get_dirname(path); + } + + strcpy(filename, _filename); + get_basename(filename); + + if (strcmp(path, filename) == 0) { + strcpy(path, "."); + } + + sprintf(outfile, "%s/%s", path, filename); + if ((ext = strrchr(outfile, '.')) == NULL) { + fprintf(stderr, "%s: does not have an extension\n", outfile); + } else { + *ext = '\0'; + } + + if (headerBlock->start[off] == headerBlock->stop[off]) { + printf("skipped %d: identical begin/end offset\n", i); + i++; + continue; + } + + sprintf(outfile + strlen(outfile), ".part_%d", i); + printf("creating %s\n", outfile); + fp_out = fopen(outfile, "w+b"); + + filepos = ftell(fp_in); + block_size = FITS_BLOCK; + + fprintf(map_out, "%zu:", filepos); + fseek(fp_in, headerBlock->start[off], SEEK_SET); + while(1) { + filepos = ftell(fp_in); + + if (filepos == headerBlock->stop[off]) { + break; + } + + bytes_read = fread(block, sizeof(char), block_size, fp_in); + if (bytes_read < 1) { + done = 1; + break; + } + + bytes_write = fwrite(block, sizeof(char), block_size, fp_out); + if (bytes_write < 1) { + perror("write failure"); + exit(1); + } + } + + fclose(fp_out); + fprintf(map_out, "%zu:%s\n", filepos, outfile); + i++; + } + fclose(map_out); return 0; } -struct SplitFITS *splitfits(const char *_filename) { - struct SplitFITS *ctx; - char *filename; +int combine_file(const char *_filename, const char *dest) { + size_t bytes; + char buffer[PATH_MAX]; + char outfile[PATH_MAX]; + char *block; char *ext; + FILE *fp_in; + FILE *fp_out; - ctx = calloc(1, sizeof(struct SplitFITS)); - if (ctx == NULL) { - perror("calloc"); - exit(1); - } + block = calloc(FITS_BLOCK, sizeof(char)); - ctx->handle = fopen(_filename, "r+b"); - if (ctx->handle == NULL) { + fp_in = fopen(_filename, "r"); + if (fp_in == NULL) { perror(_filename); exit(1); } - ctx->path_origin = strdup(_filename); - filename = strdup(_filename); + char *filename; + char path[PATH_MAX]; + + filename = calloc(PATH_MAX, sizeof(char)); - ctx->path_header = calloc(PATH_MAX, sizeof(char)); - ext = strrchr(filename, '.'); + strcpy(filename, _filename); + filename = get_basename(filename); - if (ext == NULL) { - fprintf(stderr, "%s: has no file extension\n", ctx->path_origin); + if (dest == NULL) { + strcpy(path, "."); } else { + strcpy(path, dest); + } + + sprintf(outfile, "%s/%s", path, filename); + + ext = strrchr(outfile, '.'); + if (ext != NULL) { *ext = '\0'; } - strcpy(ctx->path_header, filename); - strcat(ctx->path_header, "_hdr.txt"); + fp_out = fopen(outfile, "w+b"); + if (fp_out == NULL) { + perror(outfile); + exit(1); + } - ctx->path_data = calloc(PATH_MAX, sizeof(char)); - strcpy(ctx->path_data, filename); - strcat(ctx->path_data, "_data.bin"); + printf("Writing: %s\n", outfile); + while (fscanf(fp_in, "%s\n", buffer) > 0) { + char *mark; + char *name; + FILE *fp_tmp; + + mark = strrchr(buffer, ':'); + if (mark != NULL) { + mark++; + name = strdup(mark); + } - free(filename); - return ctx; + fp_tmp = fopen(name, "r"); + if (fp_tmp == NULL) { + perror(name); + exit(1); + } + + printf("Reading: %s\n", name); + while (1) { + if (fread(block, sizeof(char), FITS_BLOCK, fp_tmp) < 1) { + break; + } + + if (fwrite(block, sizeof(char), FITS_BLOCK, fp_out) < 1) { + perror("write failure"); + break; + } + } + fclose(fp_tmp); + } + fclose(fp_in); + fclose(fp_out); + free(block); + return 0; } int main(int argc, char *argv[]) { int bad_files; char *prog; + char *outdir; - SPLITFITS_OUTDIR = NULL; // global prog = strrchr(argv[0], '/'); + outdir = NULL; if (prog == NULL) { prog = argv[0]; @@ -226,9 +344,9 @@ int main(int argc, char *argv[]) { } if (argc < 2) { - printf("usage: %s [-o DIR] {[-c HEADER_FILE DATA_FILE] | FILE(s)}\n", prog); + printf("usage: %s [-o DIR] {[-c MAP_FILE] | FILE(s)}\n", prog); printf(" Options:\n"); - printf(" -c --combine Reconstruct original file with _{hdr.txt,data.bin} files\n"); + printf(" -c --combine Reconstruct original file using .map data\n"); printf(" -o --outdir Path where output files are stored\n"); exit(1); } @@ -240,32 +358,26 @@ int main(int argc, char *argv[]) { if (access(argv[inputs], R_OK | W_OK | X_OK) != 0) { fprintf(stderr, "%s: output directory does not exist or is not writable\n", argv[inputs]); } - SPLITFITS_OUTDIR = strdup(argv[inputs]); + outdir = strdup(argv[inputs]); + continue; } if (strcmp(argv[inputs], "-c") == 0 || strcmp(argv[inputs], "--combine") == 0) { - if (argc < 4) { - fprintf(stderr, "-c|--combine requires two arguments (HEADER FILE and DATA_FILE)"); + if (argc < 3) { + fprintf(stderr, "-c|--combine requires an argument (MAP file)"); exit(1); } int combine; inputs++; - const char *header_file = argv[inputs]; - inputs++; - const char *data_file = argv[inputs]; - - if (access(header_file, F_OK) != 0) { - fprintf(stderr, "%s: header file does not exist\n", header_file); - exit(1); - } + const char *map_file = argv[inputs]; - if (access(data_file, F_OK) != 0) { - fprintf(stderr, "%s: data file does not exist\n", header_file); + if (access(map_file, F_OK) != 0) { + fprintf(stderr, "%s: data file does not exist\n", map_file); exit(1); } - combine = splitfits_combine(header_file, data_file); + combine = combine_file(map_file, outdir); exit(combine); } break; @@ -274,7 +386,7 @@ int main(int argc, char *argv[]) { bad_files = 0; for (size_t i = inputs; i < argc; i++) { if (access(argv[i], F_OK) != 0) { - fprintf(stderr, "%s: does not exist", argv[i]); + fprintf(stderr, "%s: does not exist\n", argv[i]); bad_files = 1; } } @@ -284,20 +396,17 @@ int main(int argc, char *argv[]) { exit(1); } - for (size_t i = 1; i < argc; i++) { - struct SplitFITS *fits; - fits = splitfits(argv[i]); - - if (splitfits_split(&fits) != 0) { - fprintf(stderr, "%s: split failed\n", fits->path_origin); - } + if (outdir != NULL && access(outdir, F_OK) != 0) { + fprintf(stderr, "%s: %s\n", outdir, strerror(errno)); + exit(1); + } - splitfits_show(fits); - splitfits_free(fits); + for (size_t i = inputs; i < argc; i++) { + split_file(argv[i], outdir); } - if (SPLITFITS_OUTDIR != NULL) { - free(SPLITFITS_OUTDIR); + if (outdir != NULL) { + free(outdir); } return 0; } |