diff --git a/README b/README deleted file mode 100644 index 07e65aa..0000000 --- a/README +++ /dev/null @@ -1,68 +0,0 @@ -================== - -Defines a %%cache cell magic in the IPython notebook to cache results -and outputs of long-lasting computations in a persistent pickle file. -Useful when some computations in a notebook are long and you want to -easily save the results in a file. - -Example -------- - -- `Example - notebook `__. - -Installation ------------- - -- ``pip install ipycache`` - -Usage ------ - -- In IPython: - - :: - - %load_ext ipycache - -- Then, create a cell with: - - :: - - %%cache mycache.pkl var1 var2 - var1 = 1 - var2 = 2 - -- When you execute this cell the first time, the code is executed, and - the variables ``var1`` and ``var2`` are saved in ``mycache.pkl`` in - the current directory along with the outputs. Rich display outputs - are only saved if you use the development version of IPython. When - you execute this cell again, the code is skipped, the variables are - loaded from the file and injected into the namespace, and the outputs - are restored in the notebook. - -- Alternatively use ``$file_name`` instead of ``mycache.pkl``, where - ``file_name`` is a variable holding the path to the file used for - caching. - -- Use the ``--force`` or ``-f`` option to force the cell's execution - and overwrite the file. - -- Use the ``--read`` or ``-r`` option to prevent the cell's execution - and always load the variables from the cache. An exception is raised - if the file does not exist. - -- Use the ``--cachedir`` or ``-d`` option to specify the cache - directory. You can specify a default directory in the IPython - configuration file in your profile (typically in - ``~\.ipython\profile_default\ipython_config.py``) by adding the - following line: - - :: - - c.CacheMagics.cachedir = "/path/to/mycache" - - If both a default cache directory and the ``--cachedir`` option are - given, the latter is used. - - diff --git a/README.md b/README.md index 7b7bf18..a1fafa6 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,4 @@ Usage If both a default cache directory and the `--cachedir` option are given, the latter is used. + * Both raw and gzipped pickles are supported. Gzipped pickles are enabled when the filename end on pkl.gz. Gzipped pickles are also enable by specifing --backend pkl.gz diff --git a/README.rst b/README.rst deleted file mode 100644 index af694e7..0000000 --- a/README.rst +++ /dev/null @@ -1,68 +0,0 @@ -================== - -Defines a %%cache cell magic in the IPython notebook to cache results -and outputs of long-lasting computations in a persistent pickle file. -Useful when some computations in a notebook are long and you want to -easily save the results in a file. - -Example -------- - -- `Example - notebook `__. - -Installation ------------- - -- ``pip install ipycache`` - -Usage ------ - -- In IPython: - - :: - - %load_ext ipycache - -- Then, create a cell with: - - :: - - %%cache mycache.pkl var1 var2 - var1 = 1 - var2 = 2 - -- When you execute this cell the first time, the code is executed, and - the variables ``var1`` and ``var2`` are saved in ``mycache.pkl`` in - the current directory along with the outputs. Rich display outputs - are only saved if you use the development version of IPython. When - you execute this cell again, the code is skipped, the variables are - loaded from the file and injected into the namespace, and the outputs - are restored in the notebook. - -- Alternatively use ``$file_name`` instead of ``mycache.pkl``, where - ``file_name`` is a variable holding the path to the file used for - caching. - -- Use the ``--force`` or ``-f`` option to force the cell's execution - and overwrite the file. - -- Use the ``--read`` or ``-r`` option to prevent the cell's execution - and always load the variables from the cache. An exception is raised - if the file does not exist. - -- Use the ``--cachedir`` or ``-d`` option to specify the cache - directory. You can specify a default directory in the IPython - configuration file in your profile (typically in - ``~\.ipython\profile_default\ipython_config.py``) by adding the - following line: - - :: - - c.CacheMagics.cachedir = "/path/to/mycache" - - If both a default cache directory and the ``--cachedir`` option are - given, the latter is used. - - diff --git a/ipycache.py b/ipycache.py index 5afef5b..95f3ee9 100644 --- a/ipycache.py +++ b/ipycache.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -"""Defines a %%cache cell magic in the notebook to persistent-cache results of +"""Defines a %%cache cell magic in the notebook to persistent-cache results of long-lasting computations. """ @@ -9,6 +9,7 @@ # Stdlib import inspect, os, sys, textwrap, re +import gzip # Our own from IPython.config.configurable import Configurable @@ -24,7 +25,7 @@ # Six utility functions for Python 2/3 compatibility #------------------------------------------------------------------------------ # Author: "Benjamin Peterson " - + PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] == 3 @@ -32,11 +33,11 @@ import pickle, builtins from io import StringIO _iteritems = "items" - + exec_ = getattr(builtins, "exec") else: import cPickle as pickle - from StringIO import StringIO + from StringIO import StringIO _iteritems = "iteritems" def exec_(_code_, _globs_=None, _locs_=None): @@ -50,7 +51,7 @@ def exec_(_code_, _globs_=None, _locs_=None): elif _locs_ is None: _locs_ = _globs_ exec("""exec _code_ in _globs_, _locs_""") - + def iteritems(d, **kw): """Return an iterator over the (key, value) pairs of a dictionary.""" return iter(getattr(d, _iteritems)(**kw)) @@ -95,64 +96,76 @@ def do_save(path, force=False, read=False): if force and read: raise ValueError(("The 'force' and 'read' options are " "mutually exclusive.")) - + # Execute the cell and save the variables. return force or (not read and not os.path.exists(path)) - -def load_vars(path, vars): - """Load variables from a pickle file. - + +def load_vars(path, vars, backend='pkl'): + """Load variables from a file. + Arguments: - - * path: the path to the pickle file. + + * path: the path to the file. * vars: a list of variable names. - + Returns: - + * cache: a dictionary {var_name: var_value}. - + """ - with open(path, 'rb') as f: - # Load the variables from the cache. - try: - cache = pickle.load(f) - except EOFError as e: - cache={} - #raise IOError(str(e)) - - # Check that all requested variables could be loaded successfully - # from the cache. - missing_vars = sorted(set(vars) - set(cache.keys())) - if missing_vars: - raise ValueError(("The following variables could not be loaded " - "from the cache: {0:s}").format( - ', '.join(["'{0:s}'".format(var) for var in missing_vars]))) - additional_vars = sorted(set(cache.keys()) - set(vars)) - for hidden_variable in '_captured_io', '_cell_md5': + if backend in ('pkl', 'pkl.gz'): + if backend=='pkl': + open_fn = open + else: + open_fn = gzip.open + + with open_fn(path, 'rb') as f: + # Load the variables from the cache. try: - additional_vars.remove(hidden_variable) - except ValueError: - pass - if additional_vars: - raise ValueError("The following variables were present in the cache, " - "but removed from the storage request: {0:s}".format( - ', '.join(["'{0:s}'".format(var) for var in additional_vars]))) - - return cache - -def save_vars(path, vars_d): + cache = pickle.load(f) + except EOFError as e: + cache={} + #raise IOError(str(e)) + + # Check that all requested variables could be loaded successfully + # from the cache. + missing_vars = sorted(set(vars) - set(cache.keys())) + if missing_vars: + raise ValueError(("The following variables could not be loaded " + "from the cache: {0:s}").format( + ', '.join(["'{0:s}'".format(var) for var in missing_vars]))) + additional_vars = sorted(set(cache.keys()) - set(vars)) + for hidden_variable in '_captured_io', '_cell_md5': + try: + additional_vars.remove(hidden_variable) + except ValueError: + pass + if additional_vars: + raise ValueError("The following variables were present in the cache, " + "but removed from the storage request: {0:s}".format( + ', '.join(["'{0:s}'".format(var) for var in additional_vars]))) + return cache + else: + raise ValueError('Unknown storage backend {0}'.format(backend)) + +def save_vars(path, vars_d, backend='pkl'): """Save variables into a pickle file. - + Arguments: - + * path: the path to the pickle file. * vars_d: a dictionary {var_name: var_value}. - + """ - with open(path, 'wb') as f: - dump(vars_d, f) - - + if backend=='pkl': + with open(path, 'wb') as f: + pickle.dump(vars_d, f) + elif backend=='pkl.gz': + with gzip.open(path, 'wb') as f: + pickle.dump(vars_d, f) + else: + raise ValueError('Unknown storage backend {0}'.format(backend)) + #------------------------------------------------------------------------------ # CapturedIO #------------------------------------------------------------------------------ @@ -162,7 +175,7 @@ def save_captured_io(io): stderr=StringIO(io._stderr.getvalue()), outputs=getattr(io, '_outputs', []), # Only IPython master has this ) - + def load_captured_io(captured_io): try: return CapturedIO(captured_io.get('stdout', None), @@ -173,7 +186,7 @@ def load_captured_io(captured_io): return CapturedIO(captured_io.get('stdout', None), captured_io.get('stderr', None), ) - + class myStringIO(StringIO): """class to simultaneously capture and output""" def __init__(self, out=None, buf=""): @@ -192,47 +205,47 @@ class capture_output_and_print(object): stdout = True stderr = True display = True - + def __init__(self, stdout=True, stderr=True, display=True): self.stdout = stdout self.stderr = stderr self.display = display self.shell = None - + def __enter__(self): from IPython.core.getipython import get_ipython from IPython.core.displaypub import CapturingDisplayPublisher - + self.sys_stdout = sys.stdout self.sys_stderr = sys.stderr - + if self.display: self.shell = get_ipython() if self.shell is None: self.save_display_pub = None self.display = False - + stdout = stderr = outputs = None if self.stdout: #stdout = sys.stdout = StringIO() stdout = sys.stdout = myStringIO(out=IPython.utils.io.stdout) if self.stderr: #stderr = sys.stderr = StringIO() - stderr = sys.stderr = myStringIO(out=self.sys_stderr) + stderr = sys.stderr = myStringIO(out=self.sys_stderr) if self.display: self.save_display_pub = self.shell.display_pub self.shell.display_pub = CapturingDisplayPublisher() outputs = self.shell.display_pub.outputs return CapturedIO(stdout, stderr, outputs) - + def __exit__(self, exc_type, exc_value, traceback): sys.stdout = self.sys_stdout sys.stderr = self.sys_stderr if self.display and self.shell: self.shell.display_pub = self.save_display_pub - -#----------------------------------------------------------------------------- + +#------------------------------------------------------------------------------ # %%cache Magics #------------------------------------------------------------------------------ def cache(cell, path, vars=[], @@ -240,14 +253,24 @@ def cache(cell, path, vars=[], # without IPython, by giving mock functions here instead of IPython # methods. ip_user_ns={}, ip_run_cell=None, ip_push=None, ip_clear_output=lambda : None, - force=False, read=False, verbose=True): - + force=False, read=False, verbose=True, backend=None): + if not path: raise ValueError("The path needs to be specified as a first argument.") - + path = os.path.abspath(path) cell_md5 = hashlib.md5(cell.encode()).hexdigest() - + + # infer storage backend from path if None + if backend is None: + # try to guess, but default is pickle + if path.endswith('.pkl') or path.endswith('.pickle'): + backend = 'pkl' + elif path.endswith('.pkl.gz') or path.endswith('.pickle.gz'): + backend = 'pkl.gz' + else: + backend = 'pkl' + if do_save(path, force=force, read=read): # Capture the outputs of the cell. with capture_output_and_print() as io: @@ -262,7 +285,7 @@ def cache(cell, path, vars=[], cached = {var: ip_user_ns[var] for var in vars} except KeyError: vars_missing = set(vars) - set(ip_user_ns.keys()) - vars_missing_str = ', '.join(["'{0:s}'".format(_) + vars_missing_str = ', '.join(["'{0:s}'".format(_) for _ in vars_missing]) raise ValueError(("Variable(s) {0:s} could not be found in the " "interactive namespace").format(vars_missing_str)) @@ -270,19 +293,19 @@ def cache(cell, path, vars=[], cached['_captured_io'] = save_captured_io(io) cached['_cell_md5'] = cell_md5 # Save the cache in the pickle file. - save_vars(path, cached) + save_vars(path, cached, backend) ip_clear_output() # clear away the temporary output and replace with the saved output (ideal?) if verbose: print("[Saved variables '{0:s}' to file '{1:s}'.]".format( ', '.join(vars), path)) - - # If the cache file exists, and no --force mode, load the requested + + # If the cache file exists, and no --force mode, load the requested # variables from the specified file into the interactive namespace. else: # Load the variables from cache in inject them in the namespace. force_recalc = False try: - cached = load_vars(path, vars) + cached = load_vars(path, vars, backend) except ValueError as e: if 'The following variables' in str(e): if read: @@ -294,7 +317,7 @@ def cache(cell, path, vars=[], if not '_cell_md5' in cached or cell_md5 != cached['_cell_md5']: force_recalc = True if force_recalc and not read: - return cache(cell, path, vars, ip_user_ns, ip_run_cell, ip_push, ip_clear_output, True, read, verbose) + return cache(cell, path, vars, ip_user_ns, ip_run_cell, ip_push, ip_clear_output, True, read, verbose, backend=backend) # Handle the outputs separately. io = load_captured_io(cached.get('_captured_io', {})) # Push the remaining variables in the namespace. @@ -305,22 +328,22 @@ def cache(cell, path, vars=[], # Display the outputs, whether they come from the cell's execution # or the pickle file. - io() # output is only printed when loading file + io() # output is only printed when loading file + + - - @magics_class class CacheMagics(Magics, Configurable): """Variable caching. Provides the %cache magic.""" - + cachedir = Unicode('', config=True) - + def __init__(self, shell=None): Magics.__init__(self, shell) Configurable.__init__(self, config=shell.config) - + @magic_arguments.magic_arguments() @magic_arguments.argument( 'to', nargs=1, type=str, @@ -347,21 +370,25 @@ def __init__(self, shell=None): help=("Always read from the file and prevent the cell's execution, " "raising an error if the file does not exist.") ) + @magic_arguments.argument( + '-b', '--backend', + help=("Storage backend: 'pkl', 'pkl.gz'") + ) @cell_magic def cache(self, line, cell): """Cache user variables in a file, and skip the cell if the cached variables exist. - + Usage: - + %%cache myfile.pkl var1 var2 - # If myfile.pkl doesn't exist, this cell is executed and + # If myfile.pkl doesn't exist, this cell is executed and # var1 and var2 are saved in this file. # Otherwise, the cell is skipped and these variables are # injected from the file to the interactive namespace. var1 = ... var2 = ... - + """ ip = self.shell args = magic_arguments.parse_argstring(self.cache, line) @@ -382,10 +409,12 @@ def cache(self, line, cell): except: pass path = os.path.join(cachedir, path) - cache(cell, path, vars=vars, + + cache(cell, path, vars=vars, force=args.force, verbose=not args.silent, read=args.read, + backend=args.backend, # IPython methods - ip_user_ns=ip.user_ns, + ip_user_ns=ip.user_ns, ip_run_cell=ip.run_cell, ip_push=ip.push, ip_clear_output=clear_output @@ -394,4 +423,4 @@ def cache(self, line, cell): def load_ipython_extension(ip): """Load the extension in IPython.""" ip.register_magics(CacheMagics) - +