diff options
author | Matt Rendina <mrendina@stsci.edu> | 2019-10-10 15:17:19 -0400 |
---|---|---|
committer | Matt Rendina <mrendina@stsci.edu> | 2019-10-10 15:17:19 -0400 |
commit | a95ac10538f3232f23dd2f78ae75a119eb9daee7 (patch) | |
tree | 1d6fdd79b48cfef38204acc289263d5f49c3e1a7 /convertdata.py | |
parent | c9956f8be63786f9ed7d9994ec1f83bba2e3746c (diff) | |
download | conmets-a95ac10538f3232f23dd2f78ae75a119eb9daee7.tar.gz |
Add dataset conversion tool
Diffstat (limited to 'convertdata.py')
-rwxr-xr-x | convertdata.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/convertdata.py b/convertdata.py new file mode 100755 index 0000000..5ae8b9a --- /dev/null +++ b/convertdata.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# Used to merge old-format pickled dataset and separate log file hashes +# into the newer joined format. + +import pickle +import pandas as pd + +datfile = 'dataframe.dat' +hashfile = 'parsed_files.dat' + +# Read dataframe +frame = pd.read_pickle(datfile) + +# Read MD5 list +with open(hashfile, 'r') as f: + hashes = f.readlines() + +# Store both in a dict and pickle that dict. +data = {'file_hashes': hashes, + 'dataframe': frame} + +print(data) +print('Pickling dict...') +pickle.dump(data, open('data.p', 'wb')) |