From fff1b9a37ce8279fd4eb3d529e7a4a6c3f6ed6ff Mon Sep 17 00:00:00 2001 From: Joseph Hunkeler Date: Tue, 15 Jan 2013 12:21:55 -0500 Subject: Breaking up and refactoring --- scilo/__init__.py | 3 + scilo/scilo.py | 167 +++++++++--------------------------------------------- 2 files changed, 29 insertions(+), 141 deletions(-) diff --git a/scilo/__init__.py b/scilo/__init__.py index deebb5b..27a0083 100644 --- a/scilo/__init__.py +++ b/scilo/__init__.py @@ -17,4 +17,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with scilo. If not, see . ''' +__all__ = ['scilo', 'npy'] from scilo import scilo +from scilo import npy +from database import s3 diff --git a/scilo/scilo.py b/scilo/scilo.py index a0d9c75..f984064 100644 --- a/scilo/scilo.py +++ b/scilo/scilo.py @@ -18,161 +18,46 @@ You should have received a copy of the GNU General Public License along with scilo. If not, see . ''' import numpy as np -import sqlite3 import os import glob +import npy class scilo: def __init__(self, path): - self.database_mtime = None - self.database_mtime_cursor = None + self.mtimedb = None self.path = os.path.abspath(path) - #self.subdirs = ['data', 'result', 'npz'] - self.subdirs = { - 'data':False, - 'result':False, - 'npy':False - } - print("Dataset '%s'..." % (path)), + self.settings = npy.settings(self.path) + exit(0) + if not os.path.exists(path): - print("not found") - os.mkdir(os.path.abspath(self.path)) - print("Generating structure...") - for key in self.subdirs.iterkeys(): - d = os.path.join(self.path, key) - self.subdirs[key] = d - print("Creating directory: '%s'" % (self.subdirs[key])) - os.mkdir(self.subdirs[key]) + os.mkdir(os.path.abspath(self.settings.path)) + for key in self.settings.directories.iterkeys(): + d = os.path.join(self.settings.path, key) + self.settings.directories[key] = d + print("\tCreating directory: '%s'" % (self.settings.directories[key])) + os.mkdir(self.settings.directories[key]) else: - print("found") - for key in self.subdirs.iterkeys(): + for key in self.settings.directories.iterkeys(): d = os.path.join(self.path, key) - self.subdirs[key] = d - + self.settings.directories[key] = d + def __getitem__(self, key): - return self.subdirs[key] + return self.settings.directories[key] - def aggregate(self, globular): + def aggregate(self, globular, move=True): sources = glob.glob(globular) if not sources: return False for src in sources: - dest = os.path.join(self.subdirs['data'], os.path.basename(src)) + dest = os.path.join(self.settings.directories['data'], os.path.basename(src)) src = os.path.abspath(src) - if os.rename(src, dest) == False: - continue - - return True - - def _npy_mtime_populate(self): - database = os.path.join(self.subdirs['npy'], 'npy_mtime.db') - if not os.path.exists(database): - print("Creating modification tracking database...") - connection = sqlite3.connect(database) - c = connection.cursor() - self.database_mtime = sqlite3.connect(database) - self.database_mtime_cursor = c - c.execute("CREATE TABLE npy(file, mtime)") - for f in glob.glob(os.path.join(self.subdirs['data'], "*.*")): - print("\tFile: %s\tmtime: %f" % (os.path.basename(f), os.path.getmtime(f))) - self._npy_mtime_insert(f, os.path.getmtime(f)) - connection.commit() - else: - connection = sqlite3.connect(database) - c = connection.cursor() - self.database_mtime = connection - self.database_mtime_cursor = c - return - - def _npy_mtime_insert(self, path, mtime): - connection = self.database_mtime - c = self.database_mtime_cursor - values = (path, mtime,) - c.execute("INSERT INTO npy VALUES (?,?)", values) - connection.commit() - - def _npy_mtime_update(self, path, stored, current): - connection = self.database_mtime - c = self.database_mtime_cursor - values = (path, current, path, stored) - c.execute("UPDATE npy SET file=?, mtime=? WHERE file==? AND mtime==?", values) - connection.commit() - print("'%s' updated mtime: %f" % (path, current)) - - def _npy_mtime_delete(self, path): - connection = self.database_mtime - c = self.database_mtime_cursor - values = (path,) - c.execute("DELETE FROM npy WHERE file==?", (values)) - connection.commit() - print("'%s' removed from mtime database" % path) - return - - def _npy_mtime_check(self): - mtime_stored = [] - mtime_current = [] - c = self.database_mtime_cursor - c.execute("SELECT file, mtime FROM npy") - files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) - - for f in files: - mtime_current.append([f, os.path.getmtime(f)]) - - for f, mtime in c.fetchall(): - mtime_stored.append([str(f), mtime]) - - - for stored_file, stored_mtime in mtime_stored: - for current_file, current_mtime in mtime_current: - if not os.path.exists(stored_file) or not os.path.exists(current_file): - print("Missing data file: '%s'" % stored_file) - self._npy_mtime_delete(stored_file) - self.npy_cache_drop(stored_file) - break - if current_file == stored_file: - if current_mtime != stored_mtime: - print("'%s' differs" % current_file) - self._npy_mtime_update(current_file, stored_mtime, current_mtime) - print("Rebuilding numpy cache for '%s'" % current_file) - self.npy_cache_build(current_file) - return - - def npy_cache_build(self, path): - ''' Generate 'path' npy file in npy directory''' - temp = np.loadtxt(path) - if np.save(os.path.join(self.subdirs['npy'], os.path.basename(path)), temp) == False: - return False + if move: + if os.rename(src, dest) == False: + continue + else: + # It is text and the metadata barely matters. + import shutil + shutil.copy2(src, dest) + self.mtimedb = npy.mtimedb(**self.settings.directories) return True - - def npy_cache_drop(self, path): - ''' Remove 'path' from npy directory ''' - # For security reasons, you are only allowed to unlink files in the 'npy' directory - if os.path.dirname(path) == 'npy': - print("Unlinking '%s'" % (path)) - os.unlink(path) - - def npy_cache_drop_all(self): - ''' Remove all npy files ''' - files = glob.glob(os.path.join(self.subdirs['npy'], '*.npy')) - if files: - [os.unlink(f) for f in files] - - def npy_cache_populate(self): - files = glob.glob(os.path.join(self.subdirs['data'], '*.*')) - file_total = len(files) - file_current = 1 - - for f in files: - if os.path.exists(os.path.join(self.subdirs['npy'], os.path.basename(f) + '.npy')): - file_total -= 1 - continue - print("Building cache %d of %d: '%s'..." % (file_current, file_total, os.path.basename(f))), - if not self.npy_cache_build(f): - print("failure") - print("success") - file_current += 1 - self._npy_mtime_populate() - self._npy_mtime_check() - -if __name__ == "__main__": - pass + \ No newline at end of file -- cgit