From fa7a03a405d78bf28c16adc33a6305173a6a58c6 Mon Sep 17 00:00:00 2001 From: Daniel Maturana Date: Fri, 6 Feb 2015 19:46:01 -0500 Subject: [PATCH 1/5] Added gzipped pickle support. --- ipycache.py | 172 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 100 insertions(+), 72 deletions(-) diff --git a/ipycache.py b/ipycache.py index b4676e1..c731942 100644 --- a/ipycache.py +++ b/ipycache.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -"""Defines a %%cache cell magic in the notebook to persistent-cache results of +"""Defines a %%cache cell magic in the notebook to persistent-cache results of long-lasting computations. """ @@ -9,6 +9,7 @@ # Stdlib import inspect, os, sys, textwrap, re +import gzip # Our own from IPython.config.configurable import Configurable @@ -23,7 +24,7 @@ # Six utility functions for Python 2/3 compatibility #------------------------------------------------------------------------------ # Author: "Benjamin Peterson " - + PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] == 3 @@ -31,11 +32,11 @@ import pickle, builtins from io import StringIO _iteritems = "items" - + exec_ = getattr(builtins, "exec") else: import cPickle as pickle - from StringIO import StringIO + from StringIO import StringIO _iteritems = "iteritems" def exec_(_code_, _globs_=None, _locs_=None): @@ -49,7 +50,7 @@ def exec_(_code_, _globs_=None, _locs_=None): elif _locs_ is None: _locs_ = _globs_ exec("""exec _code_ in _globs_, _locs_""") - + def iteritems(d, **kw): """Return an iterator over the (key, value) pairs of a dictionary.""" return iter(getattr(d, _iteritems)(**kw)) @@ -85,53 +86,66 @@ def do_save(path, force=False, read=False): if force and read: raise ValueError(("The 'force' and 'read' options are " "mutually exclusive.")) - + # Execute the cell and save the variables. return force or (not read and not os.path.exists(path)) - -def load_vars(path, vars): - """Load variables from a pickle file. - + +def load_vars(path, vars, backend): + """Load variables from a file. + Arguments: - - * path: the path to the pickle file. + + * path: the path to the file. * vars: a list of variable names. - + Returns: - + * cache: a dictionary {var_name: var_value}. - + """ - with open(path, 'rb') as f: - # Load the variables from the cache. - try: - cache = pickle.load(f) - except EOFError as e: - raise IOError(e.message) - - # Check that all requested variables could be loaded successfully - # from the cache. - missing_vars = sorted(set(vars) - set(cache.keys())) - if missing_vars: - raise ValueError(("The following variables could not be loaded " - "from the cache: {0:s}").format( - ', '.join(["'{0:s}'".format(var) for var in missing_vars]))) - - return cache - -def save_vars(path, vars_d): + if backend in ('pkl', 'pkl.gz'): + if backend=='pkl': + open_fn = open + else: + open_fn = gzip.open + + with open_fn(path, 'rb') as f: + # Load the variables from the cache. + try: + cache = pickle.load(f) + except EOFError as e: + raise IOError(e.message) + + # Check that all requested variables could be loaded successfully + # from the cache. + missing_vars = sorted(set(vars) - set(cache.keys())) + if missing_vars: + raise ValueError(("The following variables could not be loaded " + "from the cache: {0:s}").format( + ', '.join(["'{0:s}'".format(var) for var in missing_vars]))) + + return cache + else: + raise ValueError('Unknown storage backend {0}'.format(backend)) + +def save_vars(path, vars_d, backend): """Save variables into a pickle file. - + Arguments: - + * path: the path to the pickle file. * vars_d: a dictionary {var_name: var_value}. - + """ - with open(path, 'wb') as f: - pickle.dump(vars_d, f) - - + if backend=='pkl': + with open(path, 'wb') as f: + pickle.dump(vars_d, f) + elif backend=='pkl.gz': + with gzip.open(path, 'wb') as f: + pickle.dump(vars_d, f) + else: + raise ValueError('Unknown storage backend {0}'.format(backend)) + #------------------------------------------------------------------------------ # CapturedIO #------------------------------------------------------------------------------ @@ -141,7 +155,7 @@ def save_captured_io(io): stderr=StringIO(io._stderr.getvalue()), outputs=getattr(io, '_outputs', []), # Only IPython master has this ) - + def load_captured_io(captured_io): try: return CapturedIO(captured_io.get('stdout', None), @@ -152,7 +166,7 @@ def load_captured_io(captured_io): return CapturedIO(captured_io.get('stdout', None), captured_io.get('stderr', None), ) - + class myStringIO(StringIO): """class to simultaneously capture and output""" def __init__(self, out=None, buf=""): @@ -171,46 +185,46 @@ class capture_output_and_print(object): stdout = True stderr = True display = True - + def __init__(self, stdout=True, stderr=True, display=True): self.stdout = stdout self.stderr = stderr self.display = display self.shell = None - + def __enter__(self): from IPython.core.getipython import get_ipython from IPython.core.displaypub import CapturingDisplayPublisher - + self.sys_stdout = sys.stdout self.sys_stderr = sys.stderr - + if self.display: self.shell = get_ipython() if self.shell is None: self.save_display_pub = None self.display = False - + stdout = stderr = outputs = None if self.stdout: #stdout = sys.stdout = StringIO() stdout = sys.stdout = myStringIO(out=IPython.utils.io.stdout) if self.stderr: #stderr = sys.stderr = StringIO() - stderr = sys.stderr = myStringIO(out=self.sys_stderr) + stderr = sys.stderr = myStringIO(out=self.sys_stderr) if self.display: self.save_display_pub = self.shell.display_pub self.shell.display_pub = CapturingDisplayPublisher() outputs = self.shell.display_pub.outputs return CapturedIO(stdout, stderr, outputs) - + def __exit__(self, exc_type, exc_value, traceback): sys.stdout = self.sys_stdout sys.stderr = self.sys_stderr if self.display and self.shell: self.shell.display_pub = self.save_display_pub - + #------------------------------------------------------------------------------ # %%cache Magics #------------------------------------------------------------------------------ @@ -219,13 +233,13 @@ def cache(cell, path, vars=[], # without IPython, by giving mock functions here instead of IPython # methods. ip_user_ns={}, ip_run_cell=None, ip_push=None, ip_clear_output=lambda : None, - force=False, read=False, verbose=True): - + force=False, read=False, verbose=True, backend=None): + if not path: raise ValueError("The path needs to be specified as a first argument.") - + path = os.path.abspath(path) - + if do_save(path, force=force, read=read): # Capture the outputs of the cell. with capture_output_and_print() as io: @@ -240,24 +254,24 @@ def cache(cell, path, vars=[], cache = {var: ip_user_ns[var] for var in vars} except KeyError: vars_missing = set(vars) - set(ip_user_ns.keys()) - vars_missing_str = ', '.join(["'{0:s}'".format(_) + vars_missing_str = ', '.join(["'{0:s}'".format(_) for _ in vars_missing]) raise ValueError(("Variable(s) {0:s} could not be found in the " "interactive namespace").format(vars_missing_str)) # Save the outputs in the cache. cache['_captured_io'] = save_captured_io(io) # Save the cache in the pickle file. - save_vars(path, cache) + save_vars(path, cache, backend) ip_clear_output() # clear away the temporary output and replace with the saved output (ideal?) if verbose: print("[Saved variables '{0:s}' to file '{1:s}'.]".format( ', '.join(vars), path)) - - # If the cache file exists, and no --force mode, load the requested + + # If the cache file exists, and no --force mode, load the requested # variables from the specified file into the interactive namespace. else: # Load the variables from cache in inject them in the namespace. - cache = load_vars(path, vars) + cache = load_vars(path, vars, backend) # Handle the outputs separately. io = load_captured_io(cache.get('_captured_io', {})) # Push the remaining variables in the namespace. @@ -268,22 +282,22 @@ def cache(cell, path, vars=[], # Display the outputs, whether they come from the cell's execution # or the pickle file. - io() # output is only printed when loading file + io() # output is only printed when loading file + + - - @magics_class class CacheMagics(Magics, Configurable): """Variable caching. Provides the %cache magic.""" - + cachedir = Unicode('', config=True) - + def __init__(self, shell=None): Magics.__init__(self, shell) Configurable.__init__(self, config=shell.config) - + @magic_arguments.magic_arguments() @magic_arguments.argument( 'to', nargs=1, type=str, @@ -310,21 +324,25 @@ def __init__(self, shell=None): help=("Always read from the file and prevent the cell's execution, " "raising an error if the file does not exist.") ) + @magic_arguments.argument( + '-b', '--backend', + help=("Storage backend: 'pkl', 'pkl.gz'") + ) @cell_magic def cache(self, line, cell): """Cache user variables in a file, and skip the cell if the cached variables exist. - + Usage: - + %%cache myfile.pkl var1 var2 - # If myfile.pkl doesn't exist, this cell is executed and + # If myfile.pkl doesn't exist, this cell is executed and # var1 and var2 are saved in this file. # Otherwise, the cell is skipped and these variables are # injected from the file to the interactive namespace. var1 = ... var2 = ... - + """ ip = self.shell args = magic_arguments.parse_argstring(self.cache, line) @@ -345,10 +363,20 @@ def cache(self, line, cell): except: pass path = os.path.join(cachedir, path) - cache(cell, path, vars=vars, + # infer storage backend from path if None + if args.backend is None: + # try to guess, but default is pickle + if path.endswith('.pkl') or path.endswith('.pickle'): + backend = 'pkl' + elif path.endswith('.pkl.gz') or path.endswith('.pickle.gz'): + backend = 'pkl.gz' + else: + backend = 'pkl' + cache(cell, path, vars=vars, force=args.force, verbose=not args.silent, read=args.read, + backend=backend, # IPython methods - ip_user_ns=ip.user_ns, + ip_user_ns=ip.user_ns, ip_run_cell=ip.run_cell, ip_push=ip.push, ip_clear_output=clear_output @@ -357,4 +385,4 @@ def cache(self, line, cell): def load_ipython_extension(ip): """Load the extension in IPython.""" ip.register_magics(CacheMagics) - + From a35fdd21b27fd7c6830be9bffe541b892a2d8dbb Mon Sep 17 00:00:00 2001 From: Vasco Tenner Date: Fri, 19 Feb 2016 10:30:20 +0100 Subject: [PATCH 2/5] Added gzip support to readme and cleaned up old readme files --- README | 68 ------------------------------------------------------ README.md | 1 + README.rst | 68 ------------------------------------------------------ 3 files changed, 1 insertion(+), 136 deletions(-) delete mode 100644 README delete mode 100644 README.rst diff --git a/README b/README deleted file mode 100644 index 07e65aa..0000000 --- a/README +++ /dev/null @@ -1,68 +0,0 @@ -================== - -Defines a %%cache cell magic in the IPython notebook to cache results -and outputs of long-lasting computations in a persistent pickle file. -Useful when some computations in a notebook are long and you want to -easily save the results in a file. - -Example -------- - -- `Example - notebook `__. - -Installation ------------- - -- ``pip install ipycache`` - -Usage ------ - -- In IPython: - - :: - - %load_ext ipycache - -- Then, create a cell with: - - :: - - %%cache mycache.pkl var1 var2 - var1 = 1 - var2 = 2 - -- When you execute this cell the first time, the code is executed, and - the variables ``var1`` and ``var2`` are saved in ``mycache.pkl`` in - the current directory along with the outputs. Rich display outputs - are only saved if you use the development version of IPython. When - you execute this cell again, the code is skipped, the variables are - loaded from the file and injected into the namespace, and the outputs - are restored in the notebook. - -- Alternatively use ``$file_name`` instead of ``mycache.pkl``, where - ``file_name`` is a variable holding the path to the file used for - caching. - -- Use the ``--force`` or ``-f`` option to force the cell's execution - and overwrite the file. - -- Use the ``--read`` or ``-r`` option to prevent the cell's execution - and always load the variables from the cache. An exception is raised - if the file does not exist. - -- Use the ``--cachedir`` or ``-d`` option to specify the cache - directory. You can specify a default directory in the IPython - configuration file in your profile (typically in - ``~\.ipython\profile_default\ipython_config.py``) by adding the - following line: - - :: - - c.CacheMagics.cachedir = "/path/to/mycache" - - If both a default cache directory and the ``--cachedir`` option are - given, the latter is used. - - diff --git a/README.md b/README.md index 7b7bf18..a1fafa6 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,4 @@ Usage If both a default cache directory and the `--cachedir` option are given, the latter is used. + * Both raw and gzipped pickles are supported. Gzipped pickles are enabled when the filename end on pkl.gz. Gzipped pickles are also enable by specifing --backend pkl.gz diff --git a/README.rst b/README.rst deleted file mode 100644 index af694e7..0000000 --- a/README.rst +++ /dev/null @@ -1,68 +0,0 @@ -================== - -Defines a %%cache cell magic in the IPython notebook to cache results -and outputs of long-lasting computations in a persistent pickle file. -Useful when some computations in a notebook are long and you want to -easily save the results in a file. - -Example -------- - -- `Example - notebook `__. - -Installation ------------- - -- ``pip install ipycache`` - -Usage ------ - -- In IPython: - - :: - - %load_ext ipycache - -- Then, create a cell with: - - :: - - %%cache mycache.pkl var1 var2 - var1 = 1 - var2 = 2 - -- When you execute this cell the first time, the code is executed, and - the variables ``var1`` and ``var2`` are saved in ``mycache.pkl`` in - the current directory along with the outputs. Rich display outputs - are only saved if you use the development version of IPython. When - you execute this cell again, the code is skipped, the variables are - loaded from the file and injected into the namespace, and the outputs - are restored in the notebook. - -- Alternatively use ``$file_name`` instead of ``mycache.pkl``, where - ``file_name`` is a variable holding the path to the file used for - caching. - -- Use the ``--force`` or ``-f`` option to force the cell's execution - and overwrite the file. - -- Use the ``--read`` or ``-r`` option to prevent the cell's execution - and always load the variables from the cache. An exception is raised - if the file does not exist. - -- Use the ``--cachedir`` or ``-d`` option to specify the cache - directory. You can specify a default directory in the IPython - configuration file in your profile (typically in - ``~\.ipython\profile_default\ipython_config.py``) by adding the - following line: - - :: - - c.CacheMagics.cachedir = "/path/to/mycache" - - If both a default cache directory and the ``--cachedir`` option are - given, the latter is used. - - From ac7691f37f9de296dd5f3962c29c80d64a0ac4c0 Mon Sep 17 00:00:00 2001 From: Vasco Tenner Date: Fri, 19 Feb 2016 12:54:22 +0100 Subject: [PATCH 3/5] Added default arguments to save_vars and load_vars such that API is backwards compatible --- ipycache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipycache.py b/ipycache.py index 72cb023..8071d8e 100644 --- a/ipycache.py +++ b/ipycache.py @@ -100,7 +100,7 @@ def do_save(path, force=False, read=False): # Execute the cell and save the variables. return force or (not read and not os.path.exists(path)) -def load_vars(path, vars, backend): +def load_vars(path, vars, backend='pkl'): """Load variables from a file. Arguments: @@ -148,7 +148,7 @@ def load_vars(path, vars, backend): else: raise ValueError('Unknown storage backend {0}'.format(backend)) -def save_vars(path, vars_d, backend): +def save_vars(path, vars_d, backend='pkl'): """Save variables into a pickle file. Arguments: From 79d30672b63e6b3df79b5dade27f6c9cb4baa0ac Mon Sep 17 00:00:00 2001 From: Vasco Tenner Date: Fri, 19 Feb 2016 12:56:29 +0100 Subject: [PATCH 4/5] Removed debugging print statement --- ipycache.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ipycache.py b/ipycache.py index 8071d8e..a148943 100644 --- a/ipycache.py +++ b/ipycache.py @@ -159,7 +159,6 @@ def save_vars(path, vars_d, backend='pkl'): """ if backend=='pkl': with open(path, 'wb') as f: - print vars_d pickle.dump(vars_d, f) elif backend=='pkl.gz': with gzip.open(path, 'wb') as f: From 22fdfb81eec26949820eebd348b9db35f475d77c Mon Sep 17 00:00:00 2001 From: Vasco Tenner Date: Fri, 19 Feb 2016 13:11:01 +0100 Subject: [PATCH 5/5] Moved backend selectoin from filename to def cache --- ipycache.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ipycache.py b/ipycache.py index a148943..95f3ee9 100644 --- a/ipycache.py +++ b/ipycache.py @@ -261,6 +261,16 @@ def cache(cell, path, vars=[], path = os.path.abspath(path) cell_md5 = hashlib.md5(cell.encode()).hexdigest() + # infer storage backend from path if None + if backend is None: + # try to guess, but default is pickle + if path.endswith('.pkl') or path.endswith('.pickle'): + backend = 'pkl' + elif path.endswith('.pkl.gz') or path.endswith('.pickle.gz'): + backend = 'pkl.gz' + else: + backend = 'pkl' + if do_save(path, force=force, read=read): # Capture the outputs of the cell. with capture_output_and_print() as io: @@ -307,7 +317,7 @@ def cache(cell, path, vars=[], if not '_cell_md5' in cached or cell_md5 != cached['_cell_md5']: force_recalc = True if force_recalc and not read: - return cache(cell, path, vars, ip_user_ns, ip_run_cell, ip_push, ip_clear_output, True, read, verbose) + return cache(cell, path, vars, ip_user_ns, ip_run_cell, ip_push, ip_clear_output, True, read, verbose, backend=backend) # Handle the outputs separately. io = load_captured_io(cached.get('_captured_io', {})) # Push the remaining variables in the namespace. @@ -399,21 +409,10 @@ def cache(self, line, cell): except: pass path = os.path.join(cachedir, path) - # infer storage backend from path if None - if args.backend is None: - # try to guess, but default is pickle - if path.endswith('.pkl') or path.endswith('.pickle'): - backend = 'pkl' - elif path.endswith('.pkl.gz') or path.endswith('.pickle.gz'): - backend = 'pkl.gz' - else: - backend = 'pkl' - else: - backend = args.backend cache(cell, path, vars=vars, force=args.force, verbose=not args.silent, read=args.read, - backend=backend, + backend=args.backend, # IPython methods ip_user_ns=ip.user_ns, ip_run_cell=ip.run_cell,