aboutsummaryrefslogtreecommitdiff
path: root/convertdata.py
diff options
context:
space:
mode:
authorMatt Rendina <mrendina@stsci.edu>2019-10-10 15:17:19 -0400
committerMatt Rendina <mrendina@stsci.edu>2019-10-10 15:17:19 -0400
commita95ac10538f3232f23dd2f78ae75a119eb9daee7 (patch)
tree1d6fdd79b48cfef38204acc289263d5f49c3e1a7 /convertdata.py
parentc9956f8be63786f9ed7d9994ec1f83bba2e3746c (diff)
downloadconmets-a95ac10538f3232f23dd2f78ae75a119eb9daee7.tar.gz
Add dataset conversion tool
Diffstat (limited to 'convertdata.py')
-rwxr-xr-xconvertdata.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/convertdata.py b/convertdata.py
new file mode 100755
index 0000000..5ae8b9a
--- /dev/null
+++ b/convertdata.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# Used to merge old-format pickled dataset and separate log file hashes
+# into the newer joined format.
+
+import pickle
+import pandas as pd
+
+datfile = 'dataframe.dat'
+hashfile = 'parsed_files.dat'
+
+# Read dataframe
+frame = pd.read_pickle(datfile)
+
+# Read MD5 list
+with open(hashfile, 'r') as f:
+ hashes = f.readlines()
+
+# Store both in a dict and pickle that dict.
+data = {'file_hashes': hashes,
+ 'dataframe': frame}
+
+print(data)
+print('Pickling dict...')
+pickle.dump(data, open('data.p', 'wb'))