From c17338e903583eae2a3125320c0f1a61dac1b2ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jard=C3=B3n?= Date: Wed, 4 May 2016 17:44:32 +0100 Subject: [PATCH 1/3] ybd/utils.py: Add make_xztar_archive() To use lzma compression instead gzip --- ybd/utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ybd/utils.py b/ybd/utils.py index 32815b2..8d036bf 100644 --- a/ybd/utils.py +++ b/ybd/utils.py @@ -15,6 +15,10 @@ # =*= License: GPL-2 =*= import gzip +try: + import lzma +except ImportError: + from backports import lzma import tarfile import contextlib import os @@ -227,6 +231,26 @@ def _process_list(srcdir, destdir, filelist, actionfunc): ' type.' % srcpath) +def make_xztar_archive(base_name, root_dir): + '''Make a xz tar archive of contents of 'root_dir'. + ''' + def add_directory_to_tarfile(f_tar, dir_name, dir_arcname): + for filename in sorted(os.listdir(dir_name)): + name = os.path.join(dir_name, filename) + arcname = os.path.join(dir_arcname, filename) + + f_tar.add(name=name, arcname=arcname, recursive=False) + + if os.path.isdir(name) and not os.path.islink(name): + add_directory_to_tarfile(f_tar, name, arcname) + + with open(base_name + '.tar.xz', 'wb') as f: + xz_context = lzma.LZMAFile(filename=f, mode='wb', preset=9) + with xz_context as f_xz: + with tarfile.TarFile(mode='w', fileobj=f_xz) as f_tar: + add_directory_to_tarfile(f_tar, root_dir, '.') + + def make_deterministic_gztar_archive(base_name, root_dir, time=1321009871.0): '''Make a gzipped tar archive of contents of 'root_dir'. From f5dbdf33b39efcacfadd1cf3c70d5d6f80caf9f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jard=C3=B3n?= Date: Wed, 4 May 2016 17:44:57 +0100 Subject: [PATCH 2/3] ybd/cache.py: Use make_xztar_archive() to compress the artifacts --- docs/dependencies.md | 2 +- install_dependencies.sh | 2 +- scripts/Dockerfile | 2 +- ybd/cache.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/dependencies.md b/docs/dependencies.md index 9001b00..8245344 100644 --- a/docs/dependencies.md +++ b/docs/dependencies.md @@ -27,7 +27,7 @@ if you trust the Python Package Index (PyPI) and pip is available on your machine, you can install these dependencies with: ``` - pip install fs pyyaml sandboxlib requests jsonschema bottle cherrypy riemann-client + pip install fs pyyaml sandboxlib requests jsonschema bottle cherrypy riemann-client backports.lzma ``` If you need to install pip itself: diff --git a/install_dependencies.sh b/install_dependencies.sh index 2286536..79e106f 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -44,6 +44,6 @@ if [ $? -ne 0 ]; then sudo rm get-pip.py fi -sudo pip install fs pyyaml sandboxlib requests +sudo pip install fs pyyaml sandboxlib requests backports.lzma sudo pip install jsonschema bottle cherrypy riemann-client sudo pip install pep8 diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 757b523..d3c07c5 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -10,4 +10,4 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \ python get-pip.py && rm get-pip.py # install python dependencies -RUN pip install fs pyyaml sandboxlib requests jsonschema bottle cherrypy +RUN pip install fs pyyaml sandboxlib requests jsonschema bottle cherrypy backports.lzma diff --git a/ybd/cache.py b/ybd/cache.py index 84ece24..2f831b0 100644 --- a/ybd/cache.py +++ b/ybd/cache.py @@ -134,8 +134,8 @@ def cache(defs, this): shutil.move('%s.tar' % cachefile, cachefile) else: utils.set_mtime_recursively(this['install']) - utils.make_deterministic_gztar_archive(cachefile, this['install']) - shutil.move('%s.tar.gz' % cachefile, cachefile) + utils.make_xztar_archive(cachefile, this['install']) + shutil.move('%s.tar.xz' % cachefile, cachefile) app.config['counter'].increment() From 919930f2735024f95a1265e72c55a3ba5e484af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jard=C3=B3n?= Date: Wed, 4 May 2016 17:45:12 +0100 Subject: [PATCH 3/3] ybd/utils.py: Remove make_deterministic_gztar_archive() We are using lzma compression now --- ybd/utils.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/ybd/utils.py b/ybd/utils.py index 8d036bf..0eac61d 100644 --- a/ybd/utils.py +++ b/ybd/utils.py @@ -14,7 +14,6 @@ # # =*= License: GPL-2 =*= -import gzip try: import lzma except ImportError: @@ -251,42 +250,6 @@ def add_directory_to_tarfile(f_tar, dir_name, dir_arcname): add_directory_to_tarfile(f_tar, root_dir, '.') -def make_deterministic_gztar_archive(base_name, root_dir, time=1321009871.0): - '''Make a gzipped tar archive of contents of 'root_dir'. - - This function takes extra steps to ensure the output is deterministic, - compared to shutil.make_archive(). First, it sorts the results of - os.listdir() to ensure the ordering of the files in the archive is the - same. Second, it sets a fixed timestamp and filename in the gzip header. - - As well as fixing https://bugs.python.org/issue24465, to make this function - redundant we would need to patch shutil.make_archive() so we could manually - set the timestamp and filename set in the gzip file header. - - ''' - # It's hard to implement this function by monkeypatching - # shutil.make_archive() because of the way the tarfile module includes the - # filename of the tarfile in the gzip header. So we have to reimplement - # shutil.make_archive(). - - def add_directory_to_tarfile(f_tar, dir_name, dir_arcname): - for filename in sorted(os.listdir(dir_name)): - name = os.path.join(dir_name, filename) - arcname = os.path.join(dir_arcname, filename) - - f_tar.add(name=name, arcname=arcname, recursive=False) - - if os.path.isdir(name) and not os.path.islink(name): - add_directory_to_tarfile(f_tar, name, arcname) - - with open(base_name + '.tar.gz', 'wb') as f: - gzip_context = gzip.GzipFile( - filename='', mode='wb', fileobj=f, mtime=time) - with gzip_context as f_gzip: - with tarfile.TarFile(mode='w', fileobj=f_gzip) as f_tar: - add_directory_to_tarfile(f_tar, root_dir, '.') - - def make_deterministic_tar_archive(base_name, root_dir): '''Make a tar archive of contents of 'root_dir'.