diff options
author | Joseph Hunkeler <jhunk@stsci.edu> | 2013-01-14 17:23:29 -0500 |
---|---|---|
committer | Joseph Hunkeler <jhunk@stsci.edu> | 2013-01-14 17:23:29 -0500 |
commit | 9bd908ccb75d145a134e10b5320585e28703275d (patch) | |
tree | 73f3e38049446cfacc27423c2d893fa213387022 /scilo | |
download | scilo-9bd908ccb75d145a134e10b5320585e28703275d.tar.gz |
Initial commit
Diffstat (limited to 'scilo')
-rw-r--r-- | scilo/__init__.py | 20 | ||||
-rw-r--r-- | scilo/scilo.py | 178 |
2 files changed, 198 insertions, 0 deletions
diff --git a/scilo/__init__.py b/scilo/__init__.py new file mode 100644 index 0000000..deebb5b --- /dev/null +++ b/scilo/__init__.py @@ -0,0 +1,20 @@ +''' +scilo - A scientific workflow and efficiency library +Copyright (C) 2012 Joseph Hunkeler <jhunkeler@gmail.com> + +This file is part of scilo. + +scilo is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +scilo is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with scilo. If not, see <http://www.gnu.org/licenses/>. +''' +from scilo import scilo diff --git a/scilo/scilo.py b/scilo/scilo.py new file mode 100644 index 0000000..a0d9c75 --- /dev/null +++ b/scilo/scilo.py @@ -0,0 +1,178 @@ +'''
+scilo - A scientific workflow and efficiency library
+Copyright (C) 2012 Joseph Hunkeler <jhunkeler@gmail.com>
+
+This file is part of scilo.
+
+scilo is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+scilo is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with scilo. If not, see <http://www.gnu.org/licenses/>.
+'''
+import numpy as np
+import sqlite3
+import os
+import glob
+
+class scilo:
+ def __init__(self, path):
+ self.database_mtime = None
+ self.database_mtime_cursor = None
+ self.path = os.path.abspath(path)
+ #self.subdirs = ['data', 'result', 'npz']
+ self.subdirs = {
+ 'data':False,
+ 'result':False,
+ 'npy':False
+ }
+ print("Dataset '%s'..." % (path)),
+ if not os.path.exists(path):
+ print("not found")
+ os.mkdir(os.path.abspath(self.path))
+ print("Generating structure...")
+ for key in self.subdirs.iterkeys():
+ d = os.path.join(self.path, key)
+ self.subdirs[key] = d
+ print("Creating directory: '%s'" % (self.subdirs[key]))
+ os.mkdir(self.subdirs[key])
+ else:
+ print("found")
+ for key in self.subdirs.iterkeys():
+ d = os.path.join(self.path, key)
+ self.subdirs[key] = d
+
+ def __getitem__(self, key):
+ return self.subdirs[key]
+
+ def aggregate(self, globular):
+ sources = glob.glob(globular)
+ if not sources:
+ return False
+ for src in sources:
+ dest = os.path.join(self.subdirs['data'], os.path.basename(src))
+ src = os.path.abspath(src)
+ if os.rename(src, dest) == False:
+ continue
+
+ return True
+
+ def _npy_mtime_populate(self):
+ database = os.path.join(self.subdirs['npy'], 'npy_mtime.db')
+ if not os.path.exists(database):
+ print("Creating modification tracking database...")
+ connection = sqlite3.connect(database)
+ c = connection.cursor()
+ self.database_mtime = sqlite3.connect(database)
+ self.database_mtime_cursor = c
+ c.execute("CREATE TABLE npy(file, mtime)")
+ for f in glob.glob(os.path.join(self.subdirs['data'], "*.*")):
+ print("\tFile: %s\tmtime: %f" % (os.path.basename(f), os.path.getmtime(f)))
+ self._npy_mtime_insert(f, os.path.getmtime(f))
+ connection.commit()
+ else:
+ connection = sqlite3.connect(database)
+ c = connection.cursor()
+ self.database_mtime = connection
+ self.database_mtime_cursor = c
+ return
+
+ def _npy_mtime_insert(self, path, mtime):
+ connection = self.database_mtime
+ c = self.database_mtime_cursor
+ values = (path, mtime,)
+ c.execute("INSERT INTO npy VALUES (?,?)", values)
+ connection.commit()
+
+ def _npy_mtime_update(self, path, stored, current):
+ connection = self.database_mtime
+ c = self.database_mtime_cursor
+ values = (path, current, path, stored)
+ c.execute("UPDATE npy SET file=?, mtime=? WHERE file==? AND mtime==?", values)
+ connection.commit()
+ print("'%s' updated mtime: %f" % (path, current))
+
+ def _npy_mtime_delete(self, path):
+ connection = self.database_mtime
+ c = self.database_mtime_cursor
+ values = (path,)
+ c.execute("DELETE FROM npy WHERE file==?", (values))
+ connection.commit()
+ print("'%s' removed from mtime database" % path)
+ return
+
+ def _npy_mtime_check(self):
+ mtime_stored = []
+ mtime_current = []
+ c = self.database_mtime_cursor
+ c.execute("SELECT file, mtime FROM npy")
+ files = glob.glob(os.path.join(self.subdirs['data'], '*.*'))
+
+ for f in files:
+ mtime_current.append([f, os.path.getmtime(f)])
+
+ for f, mtime in c.fetchall():
+ mtime_stored.append([str(f), mtime])
+
+
+ for stored_file, stored_mtime in mtime_stored:
+ for current_file, current_mtime in mtime_current:
+ if not os.path.exists(stored_file) or not os.path.exists(current_file):
+ print("Missing data file: '%s'" % stored_file)
+ self._npy_mtime_delete(stored_file)
+ self.npy_cache_drop(stored_file)
+ break
+ if current_file == stored_file:
+ if current_mtime != stored_mtime:
+ print("'%s' differs" % current_file)
+ self._npy_mtime_update(current_file, stored_mtime, current_mtime)
+ print("Rebuilding numpy cache for '%s'" % current_file)
+ self.npy_cache_build(current_file)
+ return
+
+ def npy_cache_build(self, path):
+ ''' Generate 'path' npy file in npy directory'''
+ temp = np.loadtxt(path)
+ if np.save(os.path.join(self.subdirs['npy'], os.path.basename(path)), temp) == False:
+ return False
+ return True
+
+ def npy_cache_drop(self, path):
+ ''' Remove 'path' from npy directory '''
+ # For security reasons, you are only allowed to unlink files in the 'npy' directory
+ if os.path.dirname(path) == 'npy':
+ print("Unlinking '%s'" % (path))
+ os.unlink(path)
+
+ def npy_cache_drop_all(self):
+ ''' Remove all npy files '''
+ files = glob.glob(os.path.join(self.subdirs['npy'], '*.npy'))
+ if files:
+ [os.unlink(f) for f in files]
+
+ def npy_cache_populate(self):
+ files = glob.glob(os.path.join(self.subdirs['data'], '*.*'))
+ file_total = len(files)
+ file_current = 1
+
+ for f in files:
+ if os.path.exists(os.path.join(self.subdirs['npy'], os.path.basename(f) + '.npy')):
+ file_total -= 1
+ continue
+ print("Building cache %d of %d: '%s'..." % (file_current, file_total, os.path.basename(f))),
+ if not self.npy_cache_build(f):
+ print("failure")
+ print("success")
+ file_current += 1
+ self._npy_mtime_populate()
+ self._npy_mtime_check()
+
+if __name__ == "__main__":
+ pass
|