diff options
Diffstat (limited to 'convertdata.py')
-rwxr-xr-x | convertdata.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/convertdata.py b/convertdata.py new file mode 100755 index 0000000..5ae8b9a --- /dev/null +++ b/convertdata.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# Used to merge old-format pickled dataset and separate log file hashes +# into the newer joined format. + +import pickle +import pandas as pd + +datfile = 'dataframe.dat' +hashfile = 'parsed_files.dat' + +# Read dataframe +frame = pd.read_pickle(datfile) + +# Read MD5 list +with open(hashfile, 'r') as f: + hashes = f.readlines() + +# Store both in a dict and pickle that dict. +data = {'file_hashes': hashes, + 'dataframe': frame} + +print(data) +print('Pickling dict...') +pickle.dump(data, open('data.p', 'wb')) |