''' scilo - A scientific workflow and efficiency library Copyright (C) 2012 Joseph Hunkeler This file is part of scilo. scilo is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. scilo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with scilo. If not, see . ''' import numpy as np import sqlite3 import os import glob class scilo: def __init__(self, path): self.database_mtime = None self.database_mtime_cursor = None self.path = os.path.abspath(path) #self.subdirs = ['data', 'result', 'npz'] self.subdirs = { 'data':False, 'result':False, 'npy':False } print("Dataset '%s'..." % (path)), if not os.path.exists(path): print("not found") os.mkdir(os.path.abspath(self.path)) print("Generating structure...") for key in self.subdirs.iterkeys(): d = os.path.join(self.path, key) self.subdirs[key] = d print("Creating directory: '%s'" % (self.subdirs[key])) os.mkdir(self.subdirs[key]) else: print("found") for key in self.subdirs.iterkeys(): d = os.path.join(self.path, key) self.subdirs[key] = d def __getitem__(self, key): return self.subdirs[key] def aggregate(self, globular): sources = glob.glob(globular) if not sources: return False for src in sources: dest = os.path.join(self.subdirs['data'], os.path.basename(src)) src = os.path.abspath(src) if os.rename(src, dest) == False: continue return True def _npy_mtime_populate(self): database = os.path.join(self.subdirs['npy'], 'npy_mtime.db') if not os.path.exists(database): print("Creating modification tracking database...") connection = sqlite3.connect(database) c = connection.cursor() self.database_mtime = sqlite3.connect(database) self.database_mtime_cursor = c c.execute("CREATE TABLE npy(file, mtime)") for f in glob.glob(os.path.join(self.subdirs['data'], "*.*")): print("\tFile: %s\tmtime: %f" % (os.path.basename(f), os.path.getmtime(f))) self._npy_mtime_insert(f, os.path.getmtime(f)) connection.commit() else: connection = sqlite3.connect(database) c = connection.cursor() self.database_mtime = connection self.database_mtime_cursor = c return def _npy_mtime_insert(self, path, mtime): connection = self.database_mtime c = self.database_mtime_cursor values = (path, mtime,) c.execute("INSERT INTO npy VALUES (?,?)", values) connection.commit() def _npy_mtime_update(self, path, stored, current): connection = self.database_mtime c = self.database_mtime_cursor values = (path, current, path, stored) c.execute("UPDATE npy SET file=?, mtime=? WHERE file==? AND mtime==?", values) connection.commit() print("'%s' updated mtime: %f" % (path, current)) def _npy_mtime_delete(self, path): connection = self.database_mtime c = self.database_mtime_cursor values = (path,) c.execute("DELETE FROM npy WHERE file==?", (values)) connection.commit() print("'%s' removed from mtime database" % path) return def _npy_mtime_check(self): mtime_stored = [] mtime_current = [] c = self.database_mtime_cursor c.execute("SELECT file, mtime FROM npy") files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) for f in files: mtime_current.append([f, os.path.getmtime(f)]) for f, mtime in c.fetchall(): mtime_stored.append([str(f), mtime]) for stored_file, stored_mtime in mtime_stored: for current_file, current_mtime in mtime_current: if not os.path.exists(stored_file) or not os.path.exists(current_file): print("Missing data file: '%s'" % stored_file) self._npy_mtime_delete(stored_file) self.npy_cache_drop(stored_file) break if current_file == stored_file: if current_mtime != stored_mtime: print("'%s' differs" % current_file) self._npy_mtime_update(current_file, stored_mtime, current_mtime) print("Rebuilding numpy cache for '%s'" % current_file) self.npy_cache_build(current_file) return def npy_cache_build(self, path): ''' Generate 'path' npy file in npy directory''' temp = np.loadtxt(path) if np.save(os.path.join(self.subdirs['npy'], os.path.basename(path)), temp) == False: return False return True def npy_cache_drop(self, path): ''' Remove 'path' from npy directory ''' # For security reasons, you are only allowed to unlink files in the 'npy' directory if os.path.dirname(path) == 'npy': print("Unlinking '%s'" % (path)) os.unlink(path) def npy_cache_drop_all(self): ''' Remove all npy files ''' files = glob.glob(os.path.join(self.subdirs['npy'], '*.npy')) if files: [os.unlink(f) for f in files] def npy_cache_populate(self): files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) file_total = len(files) file_current = 1 for f in files: if os.path.exists(os.path.join(self.subdirs['npy'], os.path.basename(f) + '.npy')): file_total -= 1 continue print("Building cache %d of %d: '%s'..." % (file_current, file_total, os.path.basename(f))), if not self.npy_cache_build(f): print("failure") print("success") file_current += 1 self._npy_mtime_populate() self._npy_mtime_check() if __name__ == "__main__": pass