aboutsummaryrefslogtreecommitdiff
path: root/convertdata.py
diff options
context:
space:
mode:
Diffstat (limited to 'convertdata.py')
-rwxr-xr-xconvertdata.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/convertdata.py b/convertdata.py
new file mode 100755
index 0000000..5ae8b9a
--- /dev/null
+++ b/convertdata.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# Used to merge old-format pickled dataset and separate log file hashes
+# into the newer joined format.
+
+import pickle
+import pandas as pd
+
+datfile = 'dataframe.dat'
+hashfile = 'parsed_files.dat'
+
+# Read dataframe
+frame = pd.read_pickle(datfile)
+
+# Read MD5 list
+with open(hashfile, 'r') as f:
+ hashes = f.readlines()
+
+# Store both in a dict and pickle that dict.
+data = {'file_hashes': hashes,
+ 'dataframe': frame}
+
+print(data)
+print('Pickling dict...')
+pickle.dump(data, open('data.p', 'wb'))