From 9bd908ccb75d145a134e10b5320585e28703275d Mon Sep 17 00:00:00 2001 From: Joseph Hunkeler Date: Mon, 14 Jan 2013 17:23:29 -0500 Subject: Initial commit --- scilo/__init__.py | 20 ++++++ scilo/scilo.py | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 scilo/__init__.py create mode 100644 scilo/scilo.py (limited to 'scilo') diff --git a/scilo/__init__.py b/scilo/__init__.py new file mode 100644 index 0000000..deebb5b --- /dev/null +++ b/scilo/__init__.py @@ -0,0 +1,20 @@ +''' +scilo - A scientific workflow and efficiency library +Copyright (C) 2012 Joseph Hunkeler + +This file is part of scilo. + +scilo is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +scilo is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with scilo. If not, see . +''' +from scilo import scilo diff --git a/scilo/scilo.py b/scilo/scilo.py new file mode 100644 index 0000000..a0d9c75 --- /dev/null +++ b/scilo/scilo.py @@ -0,0 +1,178 @@ +''' +scilo - A scientific workflow and efficiency library +Copyright (C) 2012 Joseph Hunkeler + +This file is part of scilo. + +scilo is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +scilo is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with scilo. If not, see . +''' +import numpy as np +import sqlite3 +import os +import glob + +class scilo: + def __init__(self, path): + self.database_mtime = None + self.database_mtime_cursor = None + self.path = os.path.abspath(path) + #self.subdirs = ['data', 'result', 'npz'] + self.subdirs = { + 'data':False, + 'result':False, + 'npy':False + } + print("Dataset '%s'..." % (path)), + if not os.path.exists(path): + print("not found") + os.mkdir(os.path.abspath(self.path)) + print("Generating structure...") + for key in self.subdirs.iterkeys(): + d = os.path.join(self.path, key) + self.subdirs[key] = d + print("Creating directory: '%s'" % (self.subdirs[key])) + os.mkdir(self.subdirs[key]) + else: + print("found") + for key in self.subdirs.iterkeys(): + d = os.path.join(self.path, key) + self.subdirs[key] = d + + def __getitem__(self, key): + return self.subdirs[key] + + def aggregate(self, globular): + sources = glob.glob(globular) + if not sources: + return False + for src in sources: + dest = os.path.join(self.subdirs['data'], os.path.basename(src)) + src = os.path.abspath(src) + if os.rename(src, dest) == False: + continue + + return True + + def _npy_mtime_populate(self): + database = os.path.join(self.subdirs['npy'], 'npy_mtime.db') + if not os.path.exists(database): + print("Creating modification tracking database...") + connection = sqlite3.connect(database) + c = connection.cursor() + self.database_mtime = sqlite3.connect(database) + self.database_mtime_cursor = c + c.execute("CREATE TABLE npy(file, mtime)") + for f in glob.glob(os.path.join(self.subdirs['data'], "*.*")): + print("\tFile: %s\tmtime: %f" % (os.path.basename(f), os.path.getmtime(f))) + self._npy_mtime_insert(f, os.path.getmtime(f)) + connection.commit() + else: + connection = sqlite3.connect(database) + c = connection.cursor() + self.database_mtime = connection + self.database_mtime_cursor = c + return + + def _npy_mtime_insert(self, path, mtime): + connection = self.database_mtime + c = self.database_mtime_cursor + values = (path, mtime,) + c.execute("INSERT INTO npy VALUES (?,?)", values) + connection.commit() + + def _npy_mtime_update(self, path, stored, current): + connection = self.database_mtime + c = self.database_mtime_cursor + values = (path, current, path, stored) + c.execute("UPDATE npy SET file=?, mtime=? WHERE file==? AND mtime==?", values) + connection.commit() + print("'%s' updated mtime: %f" % (path, current)) + + def _npy_mtime_delete(self, path): + connection = self.database_mtime + c = self.database_mtime_cursor + values = (path,) + c.execute("DELETE FROM npy WHERE file==?", (values)) + connection.commit() + print("'%s' removed from mtime database" % path) + return + + def _npy_mtime_check(self): + mtime_stored = [] + mtime_current = [] + c = self.database_mtime_cursor + c.execute("SELECT file, mtime FROM npy") + files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) + + for f in files: + mtime_current.append([f, os.path.getmtime(f)]) + + for f, mtime in c.fetchall(): + mtime_stored.append([str(f), mtime]) + + + for stored_file, stored_mtime in mtime_stored: + for current_file, current_mtime in mtime_current: + if not os.path.exists(stored_file) or not os.path.exists(current_file): + print("Missing data file: '%s'" % stored_file) + self._npy_mtime_delete(stored_file) + self.npy_cache_drop(stored_file) + break + if current_file == stored_file: + if current_mtime != stored_mtime: + print("'%s' differs" % current_file) + self._npy_mtime_update(current_file, stored_mtime, current_mtime) + print("Rebuilding numpy cache for '%s'" % current_file) + self.npy_cache_build(current_file) + return + + def npy_cache_build(self, path): + ''' Generate 'path' npy file in npy directory''' + temp = np.loadtxt(path) + if np.save(os.path.join(self.subdirs['npy'], os.path.basename(path)), temp) == False: + return False + return True + + def npy_cache_drop(self, path): + ''' Remove 'path' from npy directory ''' + # For security reasons, you are only allowed to unlink files in the 'npy' directory + if os.path.dirname(path) == 'npy': + print("Unlinking '%s'" % (path)) + os.unlink(path) + + def npy_cache_drop_all(self): + ''' Remove all npy files ''' + files = glob.glob(os.path.join(self.subdirs['npy'], '*.npy')) + if files: + [os.unlink(f) for f in files] + + def npy_cache_populate(self): + files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) + file_total = len(files) + file_current = 1 + + for f in files: + if os.path.exists(os.path.join(self.subdirs['npy'], os.path.basename(f) + '.npy')): + file_total -= 1 + continue + print("Building cache %d of %d: '%s'..." % (file_current, file_total, os.path.basename(f))), + if not self.npy_cache_build(f): + print("failure") + print("success") + file_current += 1 + self._npy_mtime_populate() + self._npy_mtime_check() + +if __name__ == "__main__": + pass -- cgit