'''
scilo - A scientific workflow and efficiency library 
Copyright (C) 2012  Joseph Hunkeler <jhunkeler@gmail.com>

This file is part of scilo.

scilo is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

scilo is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with scilo.  If not, see <http://www.gnu.org/licenses/>.
'''
import numpy as np
import sqlite3
import os
import glob

class scilo:
    def __init__(self, path):
        self.database_mtime = None
        self.database_mtime_cursor = None
        self.path = os.path.abspath(path)
        #self.subdirs = ['data', 'result', 'npz']
        self.subdirs = {
                        'data':False, 
                        'result':False, 
                        'npy':False
        }
        print("Dataset '%s'..." % (path)),
        if not os.path.exists(path):
            print("not found")
            os.mkdir(os.path.abspath(self.path))
            print("Generating structure...")
            for key in self.subdirs.iterkeys():
                d = os.path.join(self.path, key)
                self.subdirs[key] = d
                print("Creating directory: '%s'" % (self.subdirs[key]))
                os.mkdir(self.subdirs[key])
        else:
            print("found")
            for key in self.subdirs.iterkeys():
                d = os.path.join(self.path, key)
                self.subdirs[key] = d
    
    def __getitem__(self, key):
        return self.subdirs[key]
    
    def aggregate(self, globular):
        sources = glob.glob(globular)
        if not sources:
            return False
        for src in sources:
            dest = os.path.join(self.subdirs['data'], os.path.basename(src))
            src = os.path.abspath(src)
            if os.rename(src, dest) == False:
                continue
            
        return True
    
    def _npy_mtime_populate(self):
        database = os.path.join(self.subdirs['npy'], 'npy_mtime.db')
        if not os.path.exists(database):
            print("Creating modification tracking database...")
            connection = sqlite3.connect(database)
            c = connection.cursor()
            self.database_mtime = sqlite3.connect(database)
            self.database_mtime_cursor = c
            c.execute("CREATE TABLE npy(file, mtime)")
            for f in glob.glob(os.path.join(self.subdirs['data'], "*.*")):
                print("\tFile: %s\tmtime: %f" % (os.path.basename(f), os.path.getmtime(f)))
                self._npy_mtime_insert(f, os.path.getmtime(f))
            connection.commit()
        else:
            connection = sqlite3.connect(database)
            c = connection.cursor()
            self.database_mtime = connection
            self.database_mtime_cursor = c
        return
    
    def _npy_mtime_insert(self, path, mtime):
        connection = self.database_mtime
        c = self.database_mtime_cursor
        values = (path, mtime,)
        c.execute("INSERT INTO npy VALUES (?,?)", values)
        connection.commit()
    
    def _npy_mtime_update(self, path, stored, current):
        connection = self.database_mtime
        c = self.database_mtime_cursor
        values = (path, current, path, stored)
        c.execute("UPDATE npy SET file=?, mtime=? WHERE file==? AND mtime==?", values)
        connection.commit()
        print("'%s' updated mtime: %f" % (path, current))
    
    def _npy_mtime_delete(self, path):
        connection = self.database_mtime
        c = self.database_mtime_cursor
        values = (path,)
        c.execute("DELETE FROM npy WHERE file==?", (values))
        connection.commit()
        print("'%s' removed from mtime database" % path)
        return
    
    def _npy_mtime_check(self):
        mtime_stored = []
        mtime_current = []
        c = self.database_mtime_cursor
        c.execute("SELECT file, mtime FROM npy")
        files = glob.glob(os.path.join(self.subdirs['data'], '*.*'))
        
        for f in files:
            mtime_current.append([f, os.path.getmtime(f)])
        
        for f, mtime in c.fetchall():
            mtime_stored.append([str(f), mtime])
    
        
        for stored_file, stored_mtime in mtime_stored:
            for current_file, current_mtime in mtime_current:
                if not os.path.exists(stored_file) or not os.path.exists(current_file):
                    print("Missing data file: '%s'" % stored_file)
                    self._npy_mtime_delete(stored_file)
                    self.npy_cache_drop(stored_file)
                    break
                if current_file == stored_file:
                    if current_mtime != stored_mtime:
                        print("'%s' differs" % current_file)
                        self._npy_mtime_update(current_file, stored_mtime, current_mtime)
                        print("Rebuilding numpy cache for '%s'" % current_file)
                        self.npy_cache_build(current_file)
        return
    
    def npy_cache_build(self, path):
        ''' Generate 'path' npy file in npy directory'''
        temp = np.loadtxt(path)
        if np.save(os.path.join(self.subdirs['npy'], os.path.basename(path)), temp) == False:
            return False
        return True
    
    def npy_cache_drop(self, path):
        ''' Remove 'path' from npy directory '''
        # For security reasons, you are only allowed to unlink files in the 'npy' directory
        if os.path.dirname(path) == 'npy':
            print("Unlinking '%s'" % (path))
            os.unlink(path)
    
    def npy_cache_drop_all(self):
        ''' Remove all npy files '''
        files = glob.glob(os.path.join(self.subdirs['npy'], '*.npy'))
        if files:
            [os.unlink(f) for f in files]
    
    def npy_cache_populate(self):
        files = glob.glob(os.path.join(self.subdirs['data'], '*.*'))
        file_total = len(files)
        file_current = 1
        
        for f in files:
            if os.path.exists(os.path.join(self.subdirs['npy'], os.path.basename(f) + '.npy')):
                file_total -= 1
                continue
            print("Building cache %d of %d: '%s'..." % (file_current, file_total, os.path.basename(f))),
            if not self.npy_cache_build(f):
                print("failure")
            print("success")
            file_current += 1
        self._npy_mtime_populate()
        self._npy_mtime_check()

if __name__ == "__main__":
	pass